Improve tutorials' output (#1694)

* Modify __str__ and __repr__ for Document and Answer

* Rename QueryClassifier in Tutorial11

* Improve the output of tutorial1

* Make the output of Tutorial8 a bit less dense

* Add a print_questions util to print the output of question generating pipelines

* Replace custom printing with the new utility in Tutorial13

* Ensure all output is printed with minimal details in Tutorial14 and add some titles

* Minor change to print_answers

* Make tutorial3's output the same as tutorial1

* Add __repr__ to Answer and fix to_dict()

* Fix a bug in the Document and Answer's __str__ method

* Improve print_answers, print_documents and print_questions

* Using print_answers in Tutorial7 and fixing typo in the utils

* Remove duplicate line in Tutorial12

* Use print_answers in Tutorial4

* Add explanation of what the documents in the output of the basic QA pipeline are

* Move the fields constant into print_answers

* Normalize all 'minimal' to 'minimum' (they were mixed up)

* Improve the sample output to include all fields from Document and Answer

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Sara Zan 2021-11-09 15:09:26 +01:00 committed by GitHub
parent 861522b6b1
commit 91cafb49bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 484 additions and 184 deletions

View File

@ -237,7 +237,35 @@ prediction = pipe.run(
```python
print_answers(prediction, details="minimal")
# Now you can either print the object directly...
from pprint import pprint
pprint(prediction)
# Sample output:
# {
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# ...
# ]
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
# ...
# ],
# 'no_ans_gap': 11.688868522644043,
# 'node_id': 'Reader',
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
# 'query': 'Who is the father of Arya Stark?',
# 'root_node': 'Query'
# }
```
```python
# ...or use a util to simplify the output
# Change `minimum` to `medium` or `all` to raise the level of detail
print_answers(prediction, details="minimum")
```
## About us

View File

@ -296,7 +296,7 @@ Below, we define a very naive `QueryClassifier` and show how to use it:
```python
class QueryClassifier(BaseComponent):
class CustomQueryClassifier(BaseComponent):
outgoing_edges = 2
def run(self, query: str):
@ -307,7 +307,7 @@ class QueryClassifier(BaseComponent):
# Here we build the pipeline
p_classifier = Pipeline()
p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"])
p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"])
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])

View File

@ -42,7 +42,8 @@ from tqdm import tqdm
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
from haystack.utils import launch_es
from haystack.utils import launch_es, print_questions
```
Let's start an Elasticsearch instance with one of the options below:
@ -98,9 +99,11 @@ which the the document can answer.
```python
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
for document in document_store:
for idx, document in enumerate(document_store):
print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n")
result = question_generation_pipeline.run(documents=[document])
pprint(result)
print_questions(result)
```
## Retriever Question Generation Pipeline
@ -111,8 +114,10 @@ This pipeline takes a query as input. It retrieves relevant documents and then g
```python
retriever = ElasticsearchRetriever(document_store=document_store)
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)
print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n")
result = rqg_pipeline.run(query="Arya Stark")
pprint(result)
print_questions(result)
```
## Question Answer Generation Pipeline
@ -124,9 +129,11 @@ a Reader model
```python
reader = FARMReader("deepset/roberta-base-squad2")
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
for document in tqdm(document_store):
for idx, document in enumerate(tqdm(document_store)):
print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n")
result = qag_pipeline.run(documents=[document])
pprint(result)
print_questions(result)
```
## About us

View File

@ -161,14 +161,14 @@ res_1 = sklearn_keyword_classifier.run(
query="Who is the father of Arya Stark?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")
# Run only the sparse retriever on a keyword based query
res_2 = sklearn_keyword_classifier.run(
query="arya stark father"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print_answers(res_2, details="minimum")
```
@ -180,14 +180,14 @@ res_3 = sklearn_keyword_classifier.run(
query="which country was jon snow filmed ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_3)
print_answers(res_3, details="minimum")
# Run only the sparse retriever on a keyword based query
res_4 = sklearn_keyword_classifier.run(
query="jon snow country"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_4)
print_answers(res_4, details="minimum")
```
@ -197,14 +197,14 @@ res_5 = sklearn_keyword_classifier.run(
query="who are the younger brothers of arya stark ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_5)
print_answers(res_5, details="minimum")
# Run only the sparse retriever on a keyword based query
res_6 = sklearn_keyword_classifier.run(
query="arya stark younger brothers"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_6)
print_answers(res_6, details="minimum")
```
## Transformer Keyword vs Question/Statement Classifier
@ -234,14 +234,14 @@ res_1 = transformer_keyword_classifier.run(
query="Who is the father of Arya Stark?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")
# Run only the sparse retriever on a keyword based query
res_2 = transformer_keyword_classifier.run(
query="arya stark father"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print_answers(res_2, details="minimum")
```
@ -253,14 +253,14 @@ res_3 = transformer_keyword_classifier.run(
query="which country was jon snow filmed ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_3)
print_answers(res_3, details="minimum")
# Run only the sparse retriever on a keyword based query
res_4 = transformer_keyword_classifier.run(
query="jon snow country"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_4)
print_answers(res_4, details="minimum")
```
@ -270,14 +270,14 @@ res_5 = transformer_keyword_classifier.run(
query="who are the younger brothers of arya stark ?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_5)
print_answers(res_5, details="minimum")
# Run only the sparse retriever on a keyword based query
res_6 = transformer_keyword_classifier.run(
query="arya stark younger brothers"
)
print("ES Results" + "\n" + "="*15)
print_answers(res_6)
print_answers(res_6, details="minimum")
```
## Question vs Statement Classifier
@ -305,14 +305,14 @@ res_1 = transformer_question_classifier.run(
query="Who is the father of Arya Stark?"
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")
# Show only DPR results
res_2 = transformer_question_classifier.run(
query="Arya Stark was the daughter of a Lord."
)
print("ES Results" + "\n" + "="*15)
res_2
print_answers(res_2, details="minimum")
```
## Standalone Query Classifier

View File

@ -182,7 +182,34 @@ prediction = pipe.run(
```python
print_answers(prediction, details="minimal")
# Now you can either print the object directly...
from pprint import pprint
pprint(prediction)
# Sample output:
# {
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# ...
# ]
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
# ...
# ],
# 'no_ans_gap': 11.688868522644043,
# 'node_id': 'Reader',
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
# 'query': 'Who is the father of Arya Stark?',
# 'root_node': 'Query'
# }
```
```python
# ...or use a util to simplify the output
# Change `minimum` to `medium` or `all` to raise the level of detail
print_answers(prediction, details="minimum")
```
## About us

View File

@ -155,12 +155,10 @@ pipe = FAQPipeline(retriever=retriever)
```python
from haystack.utils import print_answers
prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
for a in prediction["answers"]:
print(f"Answer: {a.answer}")
print(f"Question: {a.meta['query']}")
print(f"Score: {a.score}")
print("---------------------")
print_answers(prediction, details="medium")
```
## About us

View File

@ -193,11 +193,12 @@ for question in QUESTIONS:
```python
# Or alternatively use the Pipeline class
from haystack.pipelines import GenerativeQAPipeline
from haystack.utils import print_answers
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
for question in QUESTIONS:
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
print(res)
print_answers(res, details="minimum")
```
## About us

View File

@ -186,10 +186,13 @@ class Document:
getattr(other, 'id_hash_keys', None) == self.id_hash_keys)
def __repr__(self):
return str(self.to_dict())
return f"<Document: {str(self.to_dict())}>"
def __str__(self):
return f"content: {self.content[:100]} {'[...]' if len(self.content) > 100 else ''}"
# In some cases, self.content is None (therefore not subscriptable)
if not self.content:
return f"<Document: id={self.id}, content=None>"
return f"<Document: id={self.id}, content='{self.content[:100]} {'...' if len(self.content) > 100 else ''}'>"
def __lt__(self, other):
""" Enable sorting of Documents by score """
@ -262,7 +265,13 @@ class Answer:
return self.score < other.score
def __str__(self):
return f"answer: {self.answer} \nscore: {self.score} \ncontext: {self.context}"
# self.context might be None (therefore not subscriptable)
if not self.context:
return f"<Answer: answer='{self.answer}', score={self.score}, context=None>"
return f"<Answer: answer='{self.answer}', score={self.score}, context='{self.context[:50]}{'...' if len(self.context) > 50 else ''}'>"
def __repr__(self):
return f"<Answer {asdict(self)}>"
def to_dict(self):
return asdict(self)

View File

@ -16,6 +16,7 @@ from haystack.utils.doc_store import (
from haystack.utils.export_utils import (
print_answers,
print_documents,
print_questions,
export_answers_to_csv,
convert_labels_to_squad,
)

View File

@ -1,12 +1,8 @@
from typing import Dict, Any, List, Optional
import io
import re
import time
import json
import pprint
import logging
import subprocess
import pandas as pd
from collections import defaultdict
@ -16,58 +12,98 @@ from haystack.document_stores.sql import DocumentORM
logger = logging.getLogger(__name__)
def print_answers(results: dict, details: str = "all"):
def print_answers(results: dict, details: str = "all", max_text_len: Optional[int] = None):
"""
Utilitiy function to print results of Haystack pipelines
Utility function to print results of Haystack pipelines
:param results: Results from a pipeline
:param details: One of ["minimum", "medium", "all]. Defining the level of details to print.
:param details: One of "minimum", "medium", "all". Defining the level of details to print.
:param max_text_lenght: shorten lengthy text fields to the maximum allowed length. Set to
None to not cut long text.
:return: None
"""
# TODO: unify the output format of Generator and Reader so that this function doesn't have the try/except
# Or implement a class method like PredReader.print() and PredGenerator.print() that handles all this functionality.
# This default case is when the answers come from a Reader
try:
answers = results["answers"]
pp = pprint.PrettyPrinter(indent=4)
if details in ("minimal", "medium"):
if details == "minimal":
keys_to_keep = set(["answer", "context"])
elif details == "medium":
keys_to_keep = set(["answer", "context", "score"])
# Defines the fields to keep in the Answer for each detail level
fields_to_keep_by_level = {
"minimum": ["answer", "context"],
"medium": ["answer", "context", "score"]
}
# filter the results
filtered_answers = []
for ans in answers:
filtered_answers.append({k: getattr(ans, k) for k in keys_to_keep})
pp.pprint(filtered_answers)
else:
pp.pprint(results)
# This fall back case is when the answers come from a Generator
except:
if details == "minimal":
print(f"Query: {results['query']}")
for a in results["answers"]:
print(f"Answer: {a['answer']}")
else:
pp.pprint(results)
if not "answers" in results.keys():
raise ValueError("The results object does not seem to come from a Reader: "
f"it does not contain the 'answers' key, but only: {results.keys()}. "
"Try print_documents or print_questions.")
if "query" in results.keys():
print(f"\nQuery: {results['query']}\nAnswers:")
def print_documents(results: dict, max_text_len: Optional[int] = None, print_meta: bool = False):
print(f"Query: {results['query']}")
answers = results["answers"]
pp = pprint.PrettyPrinter(indent=4)
for d in results["documents"]:
print()
new_text = d.content[:max_text_len]
if len(new_text) != len(d.content):
new_text += "..."
results = {
"name": d.meta.get("name", None),
"content": new_text
}
# Filter the results by detail level
filtered_answers = []
if details in fields_to_keep_by_level.keys():
for ans in answers:
filtered_answers.append({k: getattr(ans, k) for k in fields_to_keep_by_level[details]})
elif details == "all":
filtered_answers = answers
else:
logging.warn(f"print_answers received details='{details}', which was not understood. "
"Valid values are 'minimum', 'medium', and 'all'. Using 'all'.")
filtered_answers = answers
# Shorten long text fields
if max_text_len is not None:
for ans in answers:
if "context" in ans.keys() and len(ans["context"]) > 50:
ans["context"] = ans["context"][:50] + "..."
pp.pprint(filtered_answers)
def print_documents(results: dict, max_text_len: Optional[int] = None, print_name: bool = True, print_meta: bool = False):
"""
Utility that prints a compressed representation of the documents returned by a pipeline.
:param max_text_lenght: shorten the document's content to a maximum number of chars. if None, does not cut.
:param print_name: whether to print the document's name (from the metadata) or not.
:param print_meta: whether to print the document's metadata or not.
"""
print(f"\nQuery: {results['query']}\n")
pp = pprint.PrettyPrinter(indent=4)
for doc in results["documents"]:
content = doc.content
if max_text_len:
content = doc.content[:max_text_len] + ("..." if len(doc.content) > max_text_len else "")
results = {"content": content}
if print_name:
results["name"] = doc.meta.get("name", None)
if print_meta:
results["meta"] = d.meta
results["meta"] = doc.meta
pp.pprint(results)
print()
def print_questions(results: dict):
"""
Utility to print the output of a question generating pipeline in a readable format.
"""
if "generated_questions" in results.keys():
print("\nGenerated questions:")
for result in results["generated_questions"]:
for question in result["questions"]:
print(f" - {question}")
elif "results" in results.keys():
print("\nGenerated pairs:")
for pair in results["results"]:
print(f" - Q:{pair['query']}")
for answer in pair["answers"]:
print(f" A: {answer.answer}")
else:
raise ValueError("This object does not seem to be the output "
"of a question generating pipeline: does not contain neither "
f"'generated_questions' nor 'results', but only: {results.keys()}. "
" Try `print_answers` or `print_documents`.")
def export_answers_to_csv(agg_results: list, output_file):

View File

@ -547,7 +547,7 @@
"cell_type": "code",
"execution_count": null,
"source": [
"class QueryClassifier(BaseComponent):\n",
"class CustomQueryClassifier(BaseComponent):\n",
" outgoing_edges = 2\n",
"\n",
" def run(self, query: str):\n",
@ -558,7 +558,7 @@
"\n",
"# Here we build the pipeline\n",
"p_classifier = Pipeline()\n",
"p_classifier.add_node(component=QueryClassifier(), name=\"QueryClassifier\", inputs=[\"Query\"])\n",
"p_classifier.add_node(component=CustomQueryClassifier(), name=\"QueryClassifier\", inputs=[\"Query\"])\n",
"p_classifier.add_node(component=es_retriever, name=\"ESRetriever\", inputs=[\"QueryClassifier.output_1\"])\n",
"p_classifier.add_node(component=dpr_retriever, name=\"DPRRetriever\", inputs=[\"QueryClassifier.output_2\"])\n",
"p_classifier.add_node(component=reader, name=\"QAReader\", inputs=[\"ESRetriever\", \"DPRRetriever\"])\n",

View File

@ -2,7 +2,7 @@ from haystack.utils import clean_wiki_text, print_answers, print_documents, fetc
from pprint import pprint
from haystack import Pipeline
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import ElasticsearchRetriever, DensePassageRetriever, FARMReader, RAGenerator, JoinDocuments
from haystack.nodes import ElasticsearchRetriever, DensePassageRetriever, FARMReader, RAGenerator, BaseComponent, JoinDocuments
from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline, GenerativeQAPipeline
@ -35,33 +35,44 @@ def tutorial11_pipelines():
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
######################
# Prebuilt Pipelines #
######################
print()
print("######################")
print("# Prebuilt Pipelines #")
print("######################")
# Extractive QA Pipeline
########################
print()
print("# Extractive QA Pipeline")
print("########################")
query="Who is the father of Arya Stark?"
p_extractive_premade = ExtractiveQAPipeline(reader=reader, retriever=es_retriever)
res = p_extractive_premade.run(
query="Who is the father of Arya Stark?",
query=query,
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
)
print_answers(res, details="minimal")
print("\nQuery: ", query)
print("Answers:")
print_answers(res, details="minimum")
# Document Search Pipeline
##########################
print()
print("# Document Search Pipeline")
print("##########################")
query="Who is the father of Arya Stark?"
p_retrieval = DocumentSearchPipeline(es_retriever)
res = p_retrieval.run(
query="Who is the father of Arya Stark?",
query=query,
params={"Retriever": {"top_k": 10}},
)
print()
print_documents(res, max_text_len=200)
# Generator Pipeline
##########################
print()
print("# Generator Pipeline")
print("####################")
# We set this to True so that the document store returns document embeddings
# with each document, this is needed by the Generator
@ -73,11 +84,12 @@ def tutorial11_pipelines():
# Generative QA
p_generator = GenerativeQAPipeline(generator=rag_generator, retriever=dpr_retriever)
res = p_generator.run(
query="Who is the father of Arya Stark?",
query=query,
params={"Retriever": {"top_k": 10}},
)
print_answers(res, details="minimal")
print()
print_answers(res, details="minimum")
# We are setting this to False so that in later pipelines,
# we get a cleaner printout
@ -91,12 +103,14 @@ def tutorial11_pipelines():
p_retrieval.draw("pipeline_retrieval.png")
p_generator.draw("pipeline_generator.png")
####################
# Custom Pipelines #
####################
print()
print("####################")
print("# Custom Pipelines #")
print("####################")
# Extractive QA Pipeline
########################
print()
print("# Extractive QA Pipeline")
print("########################")
# Custom built extractive QA pipeline
p_extractive = Pipeline()
@ -104,16 +118,21 @@ def tutorial11_pipelines():
p_extractive.add_node(component=reader, name="Reader", inputs=["Retriever"])
# Now we can run it
query="Who is the father of Arya Stark?"
res = p_extractive.run(
query="Who is the father of Arya Stark?",
query=query,
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
)
print_answers(res, details="minimal")
print("\nQuery: ", query)
print("Answers:")
print_answers(res, details="minimum")
p_extractive.draw("pipeline_extractive.png")
# Ensembled Retriever Pipeline
##############################
print()
print("# Ensembled Retriever Pipeline")
print("##############################")
# Create ensembled pipeline
p_ensemble = Pipeline()
p_ensemble.add_node(component=es_retriever, name="ESRetriever", inputs=["Query"])
@ -123,22 +142,27 @@ def tutorial11_pipelines():
p_ensemble.draw("pipeline_ensemble.png")
# Run pipeline
query="Who is the father of Arya Stark?"
res = p_ensemble.run(
query="Who is the father of Arya Stark?",
params={"ESRetriever": {"top_k": 5}, "DPRRetriever": {"top_k": 5}},
)
print_answers(res, details="minimal")
print("\nQuery: ", query)
print("Answers:")
print_answers(res, details="minimum")
# Query Classification Pipeline
###############################
print()
print("# Query Classification Pipeline")
print("###############################")
# Decision Nodes help you route your data so that only certain branches of your `Pipeline` are run.
# Though this looks very similar to the ensembled pipeline shown above,
# the key difference is that only one of the retrievers is run for each request.
# By contrast both retrievers are always run in the ensembled approach.
class QueryClassifier():
class CustomQueryClassifier(BaseComponent):
outgoing_edges = 2
def run(self, query):
@ -149,25 +173,32 @@ def tutorial11_pipelines():
# Here we build the pipeline
p_classifier = Pipeline()
p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"])
p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"])
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])
p_classifier.draw("pipeline_classifier.png")
# Run only the dense retriever on the full sentence query
query="Who is the father of Arya Stark?"
res_1 = p_classifier.run(
query="Who is the father of Arya Stark?",
query=query,
)
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print()
print("\nQuery: ", query)
print(" * DPR Answers:")
print_answers(res_1, details="minimum")
# Run only the sparse retriever on a keyword based query
query="Arya Stark father"
res_2 = p_classifier.run(
query="Arya Stark father",
query=query,
)
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print()
print("\nQuery: ", query)
print(" * ES Answers:")
print_answers(res_2, details="minimum")
if __name__ == "__main__":

View File

@ -91,7 +91,6 @@ def tutorial12_lfqa():
print(f"Query: {query_2}")
print(f"Answer: {result_2['answers'][0]}")
print()
pipe.run(query=query_2, params={"Retriever": {"top_k": 1}})
if __name__ == "__main__":

View File

@ -66,7 +66,7 @@
"from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader\n",
"from haystack.document_stores import ElasticsearchDocumentStore\n",
"from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline\n",
"from haystack.utils import launch_es"
"from haystack.utils import launch_es, print_questions\n"
],
"outputs": [],
"metadata": {
@ -188,9 +188,11 @@
"execution_count": null,
"source": [
"question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n",
"for document in document_store:\n",
"for idx, document in enumerate(document_store):\n",
" \n",
" print(f\"\\n * Generating questions for document {idx}: {document.content[:100]}...\\n\")\n",
" result = question_generation_pipeline.run(documents=[document])\n",
" pprint(result)"
" print_questions(result)"
],
"outputs": [],
"metadata": {
@ -220,8 +222,10 @@
"source": [
"retriever = ElasticsearchRetriever(document_store=document_store)\n",
"rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)\n",
"\n",
"print(f\"\\n * Generating questions for documents matching the query 'Arya Stark'\\n\")\n",
"result = rqg_pipeline.run(query=\"Arya Stark\")\n",
"pprint(result)"
"print_questions(result)"
],
"outputs": [],
"metadata": {
@ -252,9 +256,11 @@
"source": [
"reader = FARMReader(\"deepset/roberta-base-squad2\")\n",
"qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)\n",
"for document in tqdm(document_store):\n",
"for idx, document in enumerate(tqdm(document_store)):\n",
"\n",
" print(f\"\\n * Generating questions and answers for document {idx}: {document.content[:100]}...\\n\")\n",
" result = qag_pipeline.run(documents=[document])\n",
" pprint(result)"
" print_questions(result)"
],
"outputs": [],
"metadata": {

View File

@ -3,7 +3,7 @@ from pprint import pprint
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
from haystack.utils import launch_es
from haystack.utils import launch_es, print_questions
"""
This is a bare bones tutorial showing what is possible with the QuestionGenerator Node which automatically generates
@ -34,20 +34,31 @@ which the the document can answer.
"""
# QuestionGenerationPipeline
print("\nQuestionGenerationPipeline")
print("==========================")
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
for document in document_store:
result = question_generation_pipeline.run(documents=[document])
pprint(result)
for idx, document in enumerate(document_store):
print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n")
result = question_generation_pipeline.run(documents=[document])
print_questions(result)
"""
This pipeline takes a query as input. It retrievers relevant documents and then generates questions based on these.
"""
# RetrieverQuestionGenerationPipeline
print("\RetrieverQuestionGenerationPipeline")
print("==================================")
retriever = ElasticsearchRetriever(document_store=document_store)
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)
print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n")
result = rqg_pipeline.run(query="Arya Stark")
pprint(result)
print_questions(result)
"""
This pipeline takes a document as input, generates questions on it, and attempts to answer these questions using
@ -55,11 +66,17 @@ a Reader model
"""
# QuestionAnswerGenerationPipeline
print("\QuestionAnswerGenerationPipeline")
print("===============================")
reader = FARMReader("deepset/roberta-base-squad2")
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
for document in tqdm(document_store):
for idx, document in enumerate(tqdm(document_store)):
print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n")
result = qag_pipeline.run(documents=[document])
pprint(result)
print_questions(result)
# This Haystack script was made with love by deepset in Berlin, Germany
# Haystack: https://github.com/deepset-ai/haystack

View File

@ -1567,14 +1567,14 @@
" query=\"Who is the father of Arya Stark?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_1)\n",
"print_answers(res_1, details=\"minimum\")\n",
"\n",
"# Run only the sparse retriever on a keyword based query\n",
"res_2 = sklearn_keyword_classifier.run(\n",
" query=\"arya stark father\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_2)\n"
"print_answers(res_2, details=\"minimum\")\n"
],
"outputs": [],
"metadata": {
@ -1591,14 +1591,14 @@
" query=\"which country was jon snow filmed ?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_3)\n",
"print_answers(res_3, details=\"minimum\")\n",
"\n",
"# Run only the sparse retriever on a keyword based query\n",
"res_4 = sklearn_keyword_classifier.run(\n",
" query=\"jon snow country\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_4)"
"print_answers(res_4, details=\"minimum\")"
],
"outputs": [],
"metadata": {
@ -1614,14 +1614,14 @@
" query=\"who are the younger brothers of arya stark ?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_5)\n",
"print_answers(res_5, details=\"minimum\")\n",
"\n",
"# Run only the sparse retriever on a keyword based query\n",
"res_6 = sklearn_keyword_classifier.run(\n",
" query=\"arya stark younger brothers\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_6)"
"print_answers(res_6, details=\"minimum\")"
],
"outputs": [],
"metadata": {
@ -1670,14 +1670,14 @@
" query=\"Who is the father of Arya Stark?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_1)\n",
"print_answers(res_1, details=\"minimum\")\n",
"\n",
"# Run only the sparse retriever on a keyword based query\n",
"res_2 = transformer_keyword_classifier.run(\n",
" query=\"arya stark father\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_2)\n"
"print_answers(res_2, details=\"minimum\")\n"
],
"outputs": [],
"metadata": {
@ -1694,14 +1694,14 @@
" query=\"which country was jon snow filmed ?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_3)\n",
"print_answers(res_3, details=\"minimum\")\n",
"\n",
"# Run only the sparse retriever on a keyword based query\n",
"res_4 = transformer_keyword_classifier.run(\n",
" query=\"jon snow country\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_4)"
"print_answers(res_4, details=\"minimum\")"
],
"outputs": [],
"metadata": {
@ -1717,14 +1717,14 @@
" query=\"who are the younger brothers of arya stark ?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_5)\n",
"print_answers(res_5, details=\"minimum\")\n",
"\n",
"# Run only the sparse retriever on a keyword based query\n",
"res_6 = transformer_keyword_classifier.run(\n",
" query=\"arya stark younger brothers\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_6)"
"print_answers(res_6, details=\"minimum\")"
],
"outputs": [],
"metadata": {
@ -1771,14 +1771,14 @@
" query=\"Who is the father of Arya Stark?\"\n",
")\n",
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
"print_answers(res_1)\n",
"print_answers(res_1, details=\"minimum\")\n",
"\n",
"# Show only DPR results\n",
"res_2 = transformer_question_classifier.run(\n",
" query=\"Arya Stark was the daughter of a Lord.\"\n",
")\n",
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
"res_2"
"print_answers(res_2, details=\"minimum\")"
],
"outputs": [],
"metadata": {

View File

@ -35,7 +35,9 @@ def tutorial14_query_classifier():
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
print()
print("Sklearn keyword classifier")
print("==========================")
# Here we build the pipeline
sklearn_keyword_classifier = Pipeline()
sklearn_keyword_classifier.add_node(component=SklearnQueryClassifier(), name="QueryClassifier", inputs=["Query"])
@ -48,44 +50,53 @@ def tutorial14_query_classifier():
res_1 = sklearn_keyword_classifier.run(
query="Who is the father of Arya Stark?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")
# Run only the sparse retriever on a keyword based query
res_2 = sklearn_keyword_classifier.run(
query="arya stark father",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print_answers(res_2, details="minimum")
# Run only the dense retriever on the full sentence query
res_3 = sklearn_keyword_classifier.run(
query="which country was jon snow filmed ?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_3)
print_answers(res_3, details="minimum")
# Run only the sparse retriever on a keyword based query
res_4 = sklearn_keyword_classifier.run(
query="jon snow country",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
print_answers(res_4)
print_answers(res_4, details="minimum")
# Run only the dense retriever on the full sentence query
res_5 = sklearn_keyword_classifier.run(
query="who are the younger brothers of arya stark ?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_5)
print_answers(res_5, details="minimum")
# Run only the sparse retriever on a keyword based query
res_6 = sklearn_keyword_classifier.run(
query="arya stark younger brothers",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
print_answers(res_6)
print_answers(res_6, details="minimum")
print()
print("Transformer keyword classifier")
print("==============================")
# Here we build the pipeline
transformer_keyword_classifier = Pipeline()
transformer_keyword_classifier.add_node(component=TransformersQueryClassifier(), name="QueryClassifier", inputs=["Query"])
@ -98,43 +109,53 @@ def tutorial14_query_classifier():
res_1 = transformer_keyword_classifier.run(
query="Who is the father of Arya Stark?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")
# Run only the sparse retriever on a keyword based query
res_2 = transformer_keyword_classifier.run(
query="arya stark father",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
print_answers(res_2)
print_answers(res_2, details="minimum")
# Run only the dense retriever on the full sentence query
res_3 = transformer_keyword_classifier.run(
query="which country was jon snow filmed ?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_3)
print_answers(res_3, details="minimum")
# Run only the sparse retriever on a keyword based query
res_4 = transformer_keyword_classifier.run(
query="jon snow country",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
print_answers(res_4)
print_answers(res_4, details="minimum")
# Run only the dense retriever on the full sentence query
res_5 = transformer_keyword_classifier.run(
query="who are the younger brothers of arya stark ?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_5)
print_answers(res_5, details="minimum")
# Run only the sparse retriever on a keyword based query
res_6 = transformer_keyword_classifier.run(
query="arya stark younger brothers",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
print_answers(res_6)
print_answers(res_6, details="minimum")
print()
print("Transformer question classifier")
print("===============================")
# Here we build the pipeline
transformer_question_classifier = Pipeline()
@ -147,15 +168,17 @@ def tutorial14_query_classifier():
res_1 = transformer_question_classifier.run(
query="Who is the father of Arya Stark?",
)
print("\n===============================")
print("DPR Results" + "\n" + "="*15)
print_answers(res_1)
print_answers(res_1, details="minimum")
# Show only DPR results
res_2 = transformer_question_classifier.run(
query="Arya Stark was the daughter of a Lord.",
)
print("\n===============================")
print("ES Results" + "\n" + "="*15)
res_2
print_answers(res_2, details="minimum")
# Here we create the keyword vs question/statement query classifier

View File

@ -368,7 +368,38 @@
"cell_type": "code",
"execution_count": null,
"source": [
"print_answers(prediction, details=\"minimal\")"
"# Now you can either print the object directly...\n",
"from pprint import pprint\n",
"\n",
"pprint(prediction)\n",
"\n",
"# Sample output: \n",
"# {\n",
"# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
"# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
"# ...\n",
"# ]\n",
"# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\\n===\\'\\'A Game of Thrones\\'\\'===\\nSansa Stark begins the novel by being betrothed to Crown ...'>,\n",
"# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\\n===Season 2===\\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,\n",
"# ...\n",
"# ],\n",
"# 'no_ans_gap': 11.688868522644043,\n",
"# 'node_id': 'Reader',\n",
"# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n",
"# 'query': 'Who is the father of Arya Stark?',\n",
"# 'root_node': 'Query'\n",
"# }\n"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"# ...or use a util to simplify the output\n",
"# Change `minimum` to `medium` or `all` to raise the level of detail\n",
"print_answers(prediction, details=\"minimum\")"
],
"outputs": [],
"metadata": {

View File

@ -134,7 +134,37 @@ def tutorial1_basic_qa_pipeline():
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
print_answers(prediction, details="minimal")
# Now you can either print the object directly
print("\n\nRaw object:\n")
from pprint import pprint
pprint(prediction)
# Sample output:
# {
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# ...
# ]
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
# ...
# ],
# 'no_ans_gap': 11.688868522644043,
# 'node_id': 'Reader',
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
# 'query': 'Who is the father of Arya Stark?',
# 'root_node': 'Query'
# }
# Note that the documents contained in the above object are the documents filtered by the Retriever from
# the document store. Although the answers were extracted from these documents, it's possible that many
# answers were taken from a single one of them, and that some of the documents were not source of any answer.
# Or use a util to simplify the output
# Change `minimum` to `medium` or `all` to raise the level of detail
print("\n\nSimplified output:\n")
print_answers(prediction, details="minimum")
if __name__ == "__main__":

View File

@ -358,11 +358,42 @@
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"# Now you can either print the object directly...\n",
"from pprint import pprint\n",
"\n",
"pprint(prediction)\n",
"\n",
"# Sample output: \n",
"# {\n",
"# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
"# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
"# ...\n",
"# ]\n",
"# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\\n===\\'\\'A Game of Thrones\\'\\'===\\nSansa Stark begins the novel by being betrothed to Crown ...'>,\n",
"# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\\n===Season 2===\\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,\n",
"# ...\n",
"# ],\n",
"# 'no_ans_gap': 11.688868522644043,\n",
"# 'node_id': 'Reader',\n",
"# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n",
"# 'query': 'Who is the father of Arya Stark?',\n",
"# 'root_node': 'Query'\n",
"# }"
],
"outputs": [],
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 11,
"source": [
"print_answers(prediction, details=\"minimal\")"
"# ...or use a util to simplify the output\n",
"# Change `minimum` to `medium` or `all` to raise the level of detail\n",
"print_answers(prediction, details=\"minimum\")"
],
"outputs": [
{

View File

@ -101,7 +101,36 @@ def tutorial3_basic_qa_pipeline_without_elasticsearch():
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
print_answers(prediction, details="minimal")
# Now you can either print the object directly
print("\n\nRaw object:\n")
from pprint import pprint
pprint(prediction)
# Sample output:
# {
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
# ...
# ]
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
# ...
# ],
# 'no_ans_gap': 11.688868522644043,
# 'node_id': 'Reader',
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
# 'query': 'Who is the father of Arya Stark?',
# 'root_node': 'Query'
# }
# Note that the documents contained in the above object are the documents filtered by the Retriever from
# the document store. Although the answers were extracted from these documents, it's possible that many
# answers were taken from a single one of them, and that some of the documents were not source of any answer.
# Or use a util to simplify the output
# Change `minimum` to `medium` or `all` to raise the level of detail
print("\n\nSimplified output:\n")
print_answers(prediction, details="minimum")
if __name__ == "__main__":

View File

@ -265,12 +265,10 @@
"cell_type": "code",
"execution_count": null,
"source": [
"from haystack.utils import print_answers\n",
"\n",
"prediction = pipe.run(query=\"How is the virus spreading?\", params={\"Retriever\": {\"top_k\": 10}})\n",
"for a in prediction[\"answers\"]:\n",
" print(f\"Answer: {a.answer}\")\n",
" print(f\"Question: {a.meta['query']}\")\n",
" print(f\"Score: {a.score}\")\n",
" print(\"---------------------\")"
"print_answers(prediction, details=\"medium\")"
],
"outputs": [],
"metadata": {

View File

@ -1,7 +1,7 @@
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.utils import launch_es
from haystack.utils import launch_es, print_answers
import pandas as pd
import requests
import logging
@ -66,17 +66,13 @@ def tutorial4_faq_style_qa():
docs_to_index = df.to_dict(orient="records")
document_store.write_documents(docs_to_index)
# Initialize a Pipeline (this time without a reader) and ask questions
# Initialize a Pipeline (this time without a reader) and ask questions
from haystack.pipelines import FAQPipeline
pipe = FAQPipeline(retriever=retriever)
prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
for a in prediction["answers"]:
print(f"Answer: {a.answer}")
print(f"Question: {a.meta['query']}")
print(f"Score: {a.score}")
print("---------------------")
print_answers(prediction, details="medium")
if __name__ == "__main__":

View File

@ -67,7 +67,7 @@ def tutorial6_better_retrieval_via_dpr():
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
print_answers(prediction, details="minimal")
print_answers(prediction, details="minimum")
if __name__ == "__main__":

View File

@ -330,11 +330,12 @@
"source": [
"# Or alternatively use the Pipeline class\n",
"from haystack.pipelines import GenerativeQAPipeline\n",
"from haystack.utils import print_answers\n",
"\n",
"pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)\n",
"for question in QUESTIONS:\n",
" res = pipe.run(query=question, params={\"Generator\": {\"top_k\": 1}, \"Retriever\": {\"top_k\": 5}})\n",
" print(res)"
" print_answers(res, details=\"minimum\")"
],
"outputs": [],
"metadata": {

View File

@ -4,6 +4,7 @@ import pandas as pd
from haystack import Document
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import RAGenerator, DensePassageRetriever
from haystack.utils import print_answers
def tutorial7_rag_generator():
@ -35,7 +36,6 @@ def tutorial7_rag_generator():
)
)
# Initialize FAISS document store to documents and corresponding index for embeddings
# Set `return_embedding` to `True`, so generator doesn't have to perform re-embedding
document_store = FAISSDocumentStore(
@ -108,14 +108,14 @@ def tutorial7_rag_generator():
# Print you answer
answers = predicted_result["answers"]
print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
print(f' -> Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
# Or alternatively use the Pipeline class
from haystack.pipelines import GenerativeQAPipeline
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
for question in QUESTIONS:
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
print(res)
print_answers(res, details="minimum")
if __name__ == "__main__":
tutorial7_rag_generator()

View File

@ -80,7 +80,7 @@ def tutorial8_preprocessing():
split_respect_sentence_boundary=True
)
docs_default = preprocessor.process(doc_txt)
print(f"n_docs_input: 1\nn_docs_output: {len(docs_default)}")
print(f"\nn_docs_input: 1\nn_docs_output: {len(docs_default)}")
"""
## Cleaning
@ -101,13 +101,14 @@ def tutorial8_preprocessing():
preprocessor_nrsb = PreProcessor(split_respect_sentence_boundary=False)
docs_nrsb = preprocessor_nrsb.process(doc_txt)
print("RESPECTING SENTENCE BOUNDARY")
print("\nRESPECTING SENTENCE BOUNDARY:")
end_text = docs_default[0]["content"][-50:]
print("End of document: \"..." + end_text + "\"")
print()
print("NOT RESPECTING SENTENCE BOUNDARY")
print("\nNOT RESPECTING SENTENCE BOUNDARY:")
end_text_nrsb = docs_nrsb[0]["content"][-50:]
print("End of document: \"..." + end_text_nrsb + "\"")
print()
"""
A commonly used strategy to split long documents, especially in the field of Question Answering,