diff --git a/docs/_src/tutorials/tutorials/1.md b/docs/_src/tutorials/tutorials/1.md index c400adf21..05eb10a80 100644 --- a/docs/_src/tutorials/tutorials/1.md +++ b/docs/_src/tutorials/tutorials/1.md @@ -237,7 +237,35 @@ prediction = pipe.run( ```python -print_answers(prediction, details="minimal") +# Now you can either print the object directly... +from pprint import pprint + +pprint(prediction) + +# Sample output: +# { +# 'answers': [ , +# , +# ... +# ] +# 'documents': [ , +# , +# ... +# ], +# 'no_ans_gap': 11.688868522644043, +# 'node_id': 'Reader', +# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}}, +# 'query': 'Who is the father of Arya Stark?', +# 'root_node': 'Query' +# } + +``` + + +```python +# ...or use a util to simplify the output +# Change `minimum` to `medium` or `all` to raise the level of detail +print_answers(prediction, details="minimum") ``` ## About us diff --git a/docs/_src/tutorials/tutorials/11.md b/docs/_src/tutorials/tutorials/11.md index 5327e7e89..126841acc 100644 --- a/docs/_src/tutorials/tutorials/11.md +++ b/docs/_src/tutorials/tutorials/11.md @@ -296,7 +296,7 @@ Below, we define a very naive `QueryClassifier` and show how to use it: ```python -class QueryClassifier(BaseComponent): +class CustomQueryClassifier(BaseComponent): outgoing_edges = 2 def run(self, query: str): @@ -307,7 +307,7 @@ class QueryClassifier(BaseComponent): # Here we build the pipeline p_classifier = Pipeline() -p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"]) +p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"]) p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"]) p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"]) p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"]) diff --git a/docs/_src/tutorials/tutorials/13.md b/docs/_src/tutorials/tutorials/13.md index 4e3e50ba2..f2d43b52d 100644 --- a/docs/_src/tutorials/tutorials/13.md +++ b/docs/_src/tutorials/tutorials/13.md @@ -42,7 +42,8 @@ from tqdm import tqdm from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader from haystack.document_stores import ElasticsearchDocumentStore from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline -from haystack.utils import launch_es +from haystack.utils import launch_es, print_questions + ``` Let's start an Elasticsearch instance with one of the options below: @@ -98,9 +99,11 @@ which the the document can answer. ```python question_generation_pipeline = QuestionGenerationPipeline(question_generator) -for document in document_store: +for idx, document in enumerate(document_store): + + print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n") result = question_generation_pipeline.run(documents=[document]) - pprint(result) + print_questions(result) ``` ## Retriever Question Generation Pipeline @@ -111,8 +114,10 @@ This pipeline takes a query as input. It retrieves relevant documents and then g ```python retriever = ElasticsearchRetriever(document_store=document_store) rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator) + +print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n") result = rqg_pipeline.run(query="Arya Stark") -pprint(result) +print_questions(result) ``` ## Question Answer Generation Pipeline @@ -124,9 +129,11 @@ a Reader model ```python reader = FARMReader("deepset/roberta-base-squad2") qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader) -for document in tqdm(document_store): +for idx, document in enumerate(tqdm(document_store)): + + print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n") result = qag_pipeline.run(documents=[document]) - pprint(result) + print_questions(result) ``` ## About us diff --git a/docs/_src/tutorials/tutorials/14.md b/docs/_src/tutorials/tutorials/14.md index 982dca9d2..16c3f4a20 100644 --- a/docs/_src/tutorials/tutorials/14.md +++ b/docs/_src/tutorials/tutorials/14.md @@ -161,14 +161,14 @@ res_1 = sklearn_keyword_classifier.run( query="Who is the father of Arya Stark?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_1) +print_answers(res_1, details="minimum") # Run only the sparse retriever on a keyword based query res_2 = sklearn_keyword_classifier.run( query="arya stark father" ) print("ES Results" + "\n" + "="*15) -print_answers(res_2) +print_answers(res_2, details="minimum") ``` @@ -180,14 +180,14 @@ res_3 = sklearn_keyword_classifier.run( query="which country was jon snow filmed ?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_3) +print_answers(res_3, details="minimum") # Run only the sparse retriever on a keyword based query res_4 = sklearn_keyword_classifier.run( query="jon snow country" ) print("ES Results" + "\n" + "="*15) -print_answers(res_4) +print_answers(res_4, details="minimum") ``` @@ -197,14 +197,14 @@ res_5 = sklearn_keyword_classifier.run( query="who are the younger brothers of arya stark ?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_5) +print_answers(res_5, details="minimum") # Run only the sparse retriever on a keyword based query res_6 = sklearn_keyword_classifier.run( query="arya stark younger brothers" ) print("ES Results" + "\n" + "="*15) -print_answers(res_6) +print_answers(res_6, details="minimum") ``` ## Transformer Keyword vs Question/Statement Classifier @@ -234,14 +234,14 @@ res_1 = transformer_keyword_classifier.run( query="Who is the father of Arya Stark?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_1) +print_answers(res_1, details="minimum") # Run only the sparse retriever on a keyword based query res_2 = transformer_keyword_classifier.run( query="arya stark father" ) print("ES Results" + "\n" + "="*15) -print_answers(res_2) +print_answers(res_2, details="minimum") ``` @@ -253,14 +253,14 @@ res_3 = transformer_keyword_classifier.run( query="which country was jon snow filmed ?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_3) +print_answers(res_3, details="minimum") # Run only the sparse retriever on a keyword based query res_4 = transformer_keyword_classifier.run( query="jon snow country" ) print("ES Results" + "\n" + "="*15) -print_answers(res_4) +print_answers(res_4, details="minimum") ``` @@ -270,14 +270,14 @@ res_5 = transformer_keyword_classifier.run( query="who are the younger brothers of arya stark ?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_5) +print_answers(res_5, details="minimum") # Run only the sparse retriever on a keyword based query res_6 = transformer_keyword_classifier.run( query="arya stark younger brothers" ) print("ES Results" + "\n" + "="*15) -print_answers(res_6) +print_answers(res_6, details="minimum") ``` ## Question vs Statement Classifier @@ -305,14 +305,14 @@ res_1 = transformer_question_classifier.run( query="Who is the father of Arya Stark?" ) print("DPR Results" + "\n" + "="*15) -print_answers(res_1) +print_answers(res_1, details="minimum") # Show only DPR results res_2 = transformer_question_classifier.run( query="Arya Stark was the daughter of a Lord." ) print("ES Results" + "\n" + "="*15) -res_2 +print_answers(res_2, details="minimum") ``` ## Standalone Query Classifier diff --git a/docs/_src/tutorials/tutorials/3.md b/docs/_src/tutorials/tutorials/3.md index 8b7a7e9d1..38256fe1f 100644 --- a/docs/_src/tutorials/tutorials/3.md +++ b/docs/_src/tutorials/tutorials/3.md @@ -182,7 +182,34 @@ prediction = pipe.run( ```python -print_answers(prediction, details="minimal") +# Now you can either print the object directly... +from pprint import pprint + +pprint(prediction) + +# Sample output: +# { +# 'answers': [ , +# , +# ... +# ] +# 'documents': [ , +# , +# ... +# ], +# 'no_ans_gap': 11.688868522644043, +# 'node_id': 'Reader', +# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}}, +# 'query': 'Who is the father of Arya Stark?', +# 'root_node': 'Query' +# } +``` + + +```python +# ...or use a util to simplify the output +# Change `minimum` to `medium` or `all` to raise the level of detail +print_answers(prediction, details="minimum") ``` ## About us diff --git a/docs/_src/tutorials/tutorials/4.md b/docs/_src/tutorials/tutorials/4.md index 16158f0fa..66dc55cf9 100644 --- a/docs/_src/tutorials/tutorials/4.md +++ b/docs/_src/tutorials/tutorials/4.md @@ -155,12 +155,10 @@ pipe = FAQPipeline(retriever=retriever) ```python +from haystack.utils import print_answers + prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}}) -for a in prediction["answers"]: - print(f"Answer: {a.answer}") - print(f"Question: {a.meta['query']}") - print(f"Score: {a.score}") - print("---------------------") +print_answers(prediction, details="medium") ``` ## About us diff --git a/docs/_src/tutorials/tutorials/7.md b/docs/_src/tutorials/tutorials/7.md index a92015758..e1e9ad12c 100644 --- a/docs/_src/tutorials/tutorials/7.md +++ b/docs/_src/tutorials/tutorials/7.md @@ -193,11 +193,12 @@ for question in QUESTIONS: ```python # Or alternatively use the Pipeline class from haystack.pipelines import GenerativeQAPipeline +from haystack.utils import print_answers pipe = GenerativeQAPipeline(generator=generator, retriever=retriever) for question in QUESTIONS: res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}}) - print(res) + print_answers(res, details="minimum") ``` ## About us diff --git a/haystack/schema.py b/haystack/schema.py index 065955a49..8467959b5 100644 --- a/haystack/schema.py +++ b/haystack/schema.py @@ -186,10 +186,13 @@ class Document: getattr(other, 'id_hash_keys', None) == self.id_hash_keys) def __repr__(self): - return str(self.to_dict()) + return f"" def __str__(self): - return f"content: {self.content[:100]} {'[...]' if len(self.content) > 100 else ''}" + # In some cases, self.content is None (therefore not subscriptable) + if not self.content: + return f"" + return f" 100 else ''}'>" def __lt__(self, other): """ Enable sorting of Documents by score """ @@ -262,7 +265,13 @@ class Answer: return self.score < other.score def __str__(self): - return f"answer: {self.answer} \nscore: {self.score} \ncontext: {self.context}" + # self.context might be None (therefore not subscriptable) + if not self.context: + return f"" + return f" 50 else ''}'>" + + def __repr__(self): + return f"" def to_dict(self): return asdict(self) diff --git a/haystack/utils/__init__.py b/haystack/utils/__init__.py index 56a8de371..d1731187b 100644 --- a/haystack/utils/__init__.py +++ b/haystack/utils/__init__.py @@ -16,6 +16,7 @@ from haystack.utils.doc_store import ( from haystack.utils.export_utils import ( print_answers, print_documents, + print_questions, export_answers_to_csv, convert_labels_to_squad, ) diff --git a/haystack/utils/export_utils.py b/haystack/utils/export_utils.py index 0788a4ee2..bf056a744 100644 --- a/haystack/utils/export_utils.py +++ b/haystack/utils/export_utils.py @@ -1,12 +1,8 @@ from typing import Dict, Any, List, Optional -import io -import re -import time import json import pprint import logging -import subprocess import pandas as pd from collections import defaultdict @@ -16,58 +12,98 @@ from haystack.document_stores.sql import DocumentORM logger = logging.getLogger(__name__) - -def print_answers(results: dict, details: str = "all"): +def print_answers(results: dict, details: str = "all", max_text_len: Optional[int] = None): """ - Utilitiy function to print results of Haystack pipelines + Utility function to print results of Haystack pipelines :param results: Results from a pipeline - :param details: One of ["minimum", "medium", "all]. Defining the level of details to print. + :param details: One of "minimum", "medium", "all". Defining the level of details to print. + :param max_text_lenght: shorten lengthy text fields to the maximum allowed length. Set to + None to not cut long text. :return: None """ - # TODO: unify the output format of Generator and Reader so that this function doesn't have the try/except - # Or implement a class method like PredReader.print() and PredGenerator.print() that handles all this functionality. - # This default case is when the answers come from a Reader - try: - answers = results["answers"] - pp = pprint.PrettyPrinter(indent=4) - if details in ("minimal", "medium"): - if details == "minimal": - keys_to_keep = set(["answer", "context"]) - elif details == "medium": - keys_to_keep = set(["answer", "context", "score"]) + # Defines the fields to keep in the Answer for each detail level + fields_to_keep_by_level = { + "minimum": ["answer", "context"], + "medium": ["answer", "context", "score"] + } - # filter the results - filtered_answers = [] - for ans in answers: - filtered_answers.append({k: getattr(ans, k) for k in keys_to_keep}) - pp.pprint(filtered_answers) - else: - pp.pprint(results) - # This fall back case is when the answers come from a Generator - except: - if details == "minimal": - print(f"Query: {results['query']}") - for a in results["answers"]: - print(f"Answer: {a['answer']}") - else: - pp.pprint(results) + if not "answers" in results.keys(): + raise ValueError("The results object does not seem to come from a Reader: " + f"it does not contain the 'answers' key, but only: {results.keys()}. " + "Try print_documents or print_questions.") + if "query" in results.keys(): + print(f"\nQuery: {results['query']}\nAnswers:") -def print_documents(results: dict, max_text_len: Optional[int] = None, print_meta: bool = False): - print(f"Query: {results['query']}") + answers = results["answers"] pp = pprint.PrettyPrinter(indent=4) - for d in results["documents"]: - print() - new_text = d.content[:max_text_len] - if len(new_text) != len(d.content): - new_text += "..." - results = { - "name": d.meta.get("name", None), - "content": new_text - } + + # Filter the results by detail level + filtered_answers = [] + if details in fields_to_keep_by_level.keys(): + for ans in answers: + filtered_answers.append({k: getattr(ans, k) for k in fields_to_keep_by_level[details]}) + elif details == "all": + filtered_answers = answers + else: + logging.warn(f"print_answers received details='{details}', which was not understood. " + "Valid values are 'minimum', 'medium', and 'all'. Using 'all'.") + filtered_answers = answers + + # Shorten long text fields + if max_text_len is not None: + for ans in answers: + if "context" in ans.keys() and len(ans["context"]) > 50: + ans["context"] = ans["context"][:50] + "..." + + pp.pprint(filtered_answers) + + +def print_documents(results: dict, max_text_len: Optional[int] = None, print_name: bool = True, print_meta: bool = False): + """ + Utility that prints a compressed representation of the documents returned by a pipeline. + :param max_text_lenght: shorten the document's content to a maximum number of chars. if None, does not cut. + :param print_name: whether to print the document's name (from the metadata) or not. + :param print_meta: whether to print the document's metadata or not. + """ + print(f"\nQuery: {results['query']}\n") + pp = pprint.PrettyPrinter(indent=4) + + for doc in results["documents"]: + content = doc.content + if max_text_len: + content = doc.content[:max_text_len] + ("..." if len(doc.content) > max_text_len else "") + results = {"content": content} + if print_name: + results["name"] = doc.meta.get("name", None) if print_meta: - results["meta"] = d.meta + results["meta"] = doc.meta pp.pprint(results) + print() + + +def print_questions(results: dict): + """ + Utility to print the output of a question generating pipeline in a readable format. + """ + if "generated_questions" in results.keys(): + print("\nGenerated questions:") + for result in results["generated_questions"]: + for question in result["questions"]: + print(f" - {question}") + + elif "results" in results.keys(): + print("\nGenerated pairs:") + for pair in results["results"]: + print(f" - Q:{pair['query']}") + for answer in pair["answers"]: + print(f" A: {answer.answer}") + + else: + raise ValueError("This object does not seem to be the output " + "of a question generating pipeline: does not contain neither " + f"'generated_questions' nor 'results', but only: {results.keys()}. " + " Try `print_answers` or `print_documents`.") def export_answers_to_csv(agg_results: list, output_file): diff --git a/tutorials/Tutorial11_Pipelines.ipynb b/tutorials/Tutorial11_Pipelines.ipynb index dac3ac5f9..7c2aa94f2 100644 --- a/tutorials/Tutorial11_Pipelines.ipynb +++ b/tutorials/Tutorial11_Pipelines.ipynb @@ -547,7 +547,7 @@ "cell_type": "code", "execution_count": null, "source": [ - "class QueryClassifier(BaseComponent):\n", + "class CustomQueryClassifier(BaseComponent):\n", " outgoing_edges = 2\n", "\n", " def run(self, query: str):\n", @@ -558,7 +558,7 @@ "\n", "# Here we build the pipeline\n", "p_classifier = Pipeline()\n", - "p_classifier.add_node(component=QueryClassifier(), name=\"QueryClassifier\", inputs=[\"Query\"])\n", + "p_classifier.add_node(component=CustomQueryClassifier(), name=\"QueryClassifier\", inputs=[\"Query\"])\n", "p_classifier.add_node(component=es_retriever, name=\"ESRetriever\", inputs=[\"QueryClassifier.output_1\"])\n", "p_classifier.add_node(component=dpr_retriever, name=\"DPRRetriever\", inputs=[\"QueryClassifier.output_2\"])\n", "p_classifier.add_node(component=reader, name=\"QAReader\", inputs=[\"ESRetriever\", \"DPRRetriever\"])\n", diff --git a/tutorials/Tutorial11_Pipelines.py b/tutorials/Tutorial11_Pipelines.py index c9d71d568..30d5de17d 100644 --- a/tutorials/Tutorial11_Pipelines.py +++ b/tutorials/Tutorial11_Pipelines.py @@ -2,7 +2,7 @@ from haystack.utils import clean_wiki_text, print_answers, print_documents, fetc from pprint import pprint from haystack import Pipeline from haystack.document_stores import ElasticsearchDocumentStore -from haystack.nodes import ElasticsearchRetriever, DensePassageRetriever, FARMReader, RAGenerator, JoinDocuments +from haystack.nodes import ElasticsearchRetriever, DensePassageRetriever, FARMReader, RAGenerator, BaseComponent, JoinDocuments from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline, GenerativeQAPipeline @@ -35,33 +35,44 @@ def tutorial11_pipelines(): reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2") - ###################### - # Prebuilt Pipelines # - ###################### + print() + print("######################") + print("# Prebuilt Pipelines #") + print("######################") - # Extractive QA Pipeline - ######################## + print() + print("# Extractive QA Pipeline") + print("########################") + query="Who is the father of Arya Stark?" p_extractive_premade = ExtractiveQAPipeline(reader=reader, retriever=es_retriever) res = p_extractive_premade.run( - query="Who is the father of Arya Stark?", + query=query, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}, ) - print_answers(res, details="minimal") + print("\nQuery: ", query) + print("Answers:") + print_answers(res, details="minimum") - # Document Search Pipeline - ########################## + print() + print("# Document Search Pipeline") + print("##########################") + + query="Who is the father of Arya Stark?" p_retrieval = DocumentSearchPipeline(es_retriever) res = p_retrieval.run( - query="Who is the father of Arya Stark?", + query=query, params={"Retriever": {"top_k": 10}}, ) + print() print_documents(res, max_text_len=200) - # Generator Pipeline - ########################## + + print() + print("# Generator Pipeline") + print("####################") # We set this to True so that the document store returns document embeddings # with each document, this is needed by the Generator @@ -73,11 +84,12 @@ def tutorial11_pipelines(): # Generative QA p_generator = GenerativeQAPipeline(generator=rag_generator, retriever=dpr_retriever) res = p_generator.run( - query="Who is the father of Arya Stark?", + query=query, params={"Retriever": {"top_k": 10}}, ) - print_answers(res, details="minimal") + print() + print_answers(res, details="minimum") # We are setting this to False so that in later pipelines, # we get a cleaner printout @@ -91,12 +103,14 @@ def tutorial11_pipelines(): p_retrieval.draw("pipeline_retrieval.png") p_generator.draw("pipeline_generator.png") - #################### - # Custom Pipelines # - #################### + print() + print("####################") + print("# Custom Pipelines #") + print("####################") - # Extractive QA Pipeline - ######################## + print() + print("# Extractive QA Pipeline") + print("########################") # Custom built extractive QA pipeline p_extractive = Pipeline() @@ -104,16 +118,21 @@ def tutorial11_pipelines(): p_extractive.add_node(component=reader, name="Reader", inputs=["Retriever"]) # Now we can run it + query="Who is the father of Arya Stark?" res = p_extractive.run( - query="Who is the father of Arya Stark?", + query=query, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}, ) - print_answers(res, details="minimal") + print("\nQuery: ", query) + print("Answers:") + print_answers(res, details="minimum") p_extractive.draw("pipeline_extractive.png") - # Ensembled Retriever Pipeline - ############################## + print() + print("# Ensembled Retriever Pipeline") + print("##############################") + # Create ensembled pipeline p_ensemble = Pipeline() p_ensemble.add_node(component=es_retriever, name="ESRetriever", inputs=["Query"]) @@ -123,22 +142,27 @@ def tutorial11_pipelines(): p_ensemble.draw("pipeline_ensemble.png") # Run pipeline + query="Who is the father of Arya Stark?" res = p_ensemble.run( query="Who is the father of Arya Stark?", params={"ESRetriever": {"top_k": 5}, "DPRRetriever": {"top_k": 5}}, ) - print_answers(res, details="minimal") + print("\nQuery: ", query) + print("Answers:") + print_answers(res, details="minimum") - # Query Classification Pipeline - ############################### + + print() + print("# Query Classification Pipeline") + print("###############################") # Decision Nodes help you route your data so that only certain branches of your `Pipeline` are run. # Though this looks very similar to the ensembled pipeline shown above, # the key difference is that only one of the retrievers is run for each request. # By contrast both retrievers are always run in the ensembled approach. - class QueryClassifier(): + class CustomQueryClassifier(BaseComponent): outgoing_edges = 2 def run(self, query): @@ -149,25 +173,32 @@ def tutorial11_pipelines(): # Here we build the pipeline p_classifier = Pipeline() - p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"]) + p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"]) p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"]) p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"]) p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"]) p_classifier.draw("pipeline_classifier.png") # Run only the dense retriever on the full sentence query + query="Who is the father of Arya Stark?" res_1 = p_classifier.run( - query="Who is the father of Arya Stark?", + query=query, ) - print("DPR Results" + "\n" + "="*15) - print_answers(res_1) + print() + print("\nQuery: ", query) + print(" * DPR Answers:") + print_answers(res_1, details="minimum") + # Run only the sparse retriever on a keyword based query + query="Arya Stark father" res_2 = p_classifier.run( - query="Arya Stark father", + query=query, ) - print("ES Results" + "\n" + "="*15) - print_answers(res_2) + print() + print("\nQuery: ", query) + print(" * ES Answers:") + print_answers(res_2, details="minimum") if __name__ == "__main__": diff --git a/tutorials/Tutorial12_LFQA.py b/tutorials/Tutorial12_LFQA.py index 58b439524..c2d80ba9e 100644 --- a/tutorials/Tutorial12_LFQA.py +++ b/tutorials/Tutorial12_LFQA.py @@ -91,7 +91,6 @@ def tutorial12_lfqa(): print(f"Query: {query_2}") print(f"Answer: {result_2['answers'][0]}") print() - pipe.run(query=query_2, params={"Retriever": {"top_k": 1}}) if __name__ == "__main__": diff --git a/tutorials/Tutorial13_Question_generation.ipynb b/tutorials/Tutorial13_Question_generation.ipynb index 4b4bd81e8..963a9c9ae 100644 --- a/tutorials/Tutorial13_Question_generation.ipynb +++ b/tutorials/Tutorial13_Question_generation.ipynb @@ -66,7 +66,7 @@ "from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader\n", "from haystack.document_stores import ElasticsearchDocumentStore\n", "from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline\n", - "from haystack.utils import launch_es" + "from haystack.utils import launch_es, print_questions\n" ], "outputs": [], "metadata": { @@ -188,9 +188,11 @@ "execution_count": null, "source": [ "question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n", - "for document in document_store:\n", + "for idx, document in enumerate(document_store):\n", + " \n", + " print(f\"\\n * Generating questions for document {idx}: {document.content[:100]}...\\n\")\n", " result = question_generation_pipeline.run(documents=[document])\n", - " pprint(result)" + " print_questions(result)" ], "outputs": [], "metadata": { @@ -220,8 +222,10 @@ "source": [ "retriever = ElasticsearchRetriever(document_store=document_store)\n", "rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)\n", + "\n", + "print(f\"\\n * Generating questions for documents matching the query 'Arya Stark'\\n\")\n", "result = rqg_pipeline.run(query=\"Arya Stark\")\n", - "pprint(result)" + "print_questions(result)" ], "outputs": [], "metadata": { @@ -252,9 +256,11 @@ "source": [ "reader = FARMReader(\"deepset/roberta-base-squad2\")\n", "qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)\n", - "for document in tqdm(document_store):\n", + "for idx, document in enumerate(tqdm(document_store)):\n", + "\n", + " print(f\"\\n * Generating questions and answers for document {idx}: {document.content[:100]}...\\n\")\n", " result = qag_pipeline.run(documents=[document])\n", - " pprint(result)" + " print_questions(result)" ], "outputs": [], "metadata": { diff --git a/tutorials/Tutorial13_Question_generation.py b/tutorials/Tutorial13_Question_generation.py index 4a9cdebd2..831f8b7b5 100644 --- a/tutorials/Tutorial13_Question_generation.py +++ b/tutorials/Tutorial13_Question_generation.py @@ -3,7 +3,7 @@ from pprint import pprint from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader from haystack.document_stores import ElasticsearchDocumentStore from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline -from haystack.utils import launch_es +from haystack.utils import launch_es, print_questions """ This is a bare bones tutorial showing what is possible with the QuestionGenerator Node which automatically generates @@ -34,20 +34,31 @@ which the the document can answer. """ # QuestionGenerationPipeline +print("\nQuestionGenerationPipeline") +print("==========================") + question_generation_pipeline = QuestionGenerationPipeline(question_generator) -for document in document_store: - result = question_generation_pipeline.run(documents=[document]) - pprint(result) +for idx, document in enumerate(document_store): + + print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n") + result = question_generation_pipeline.run(documents=[document]) + print_questions(result) """ This pipeline takes a query as input. It retrievers relevant documents and then generates questions based on these. """ # RetrieverQuestionGenerationPipeline +print("\RetrieverQuestionGenerationPipeline") +print("==================================") + retriever = ElasticsearchRetriever(document_store=document_store) rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator) + +print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n") result = rqg_pipeline.run(query="Arya Stark") -pprint(result) +print_questions(result) + """ This pipeline takes a document as input, generates questions on it, and attempts to answer these questions using @@ -55,11 +66,17 @@ a Reader model """ # QuestionAnswerGenerationPipeline +print("\QuestionAnswerGenerationPipeline") +print("===============================") + reader = FARMReader("deepset/roberta-base-squad2") qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader) -for document in tqdm(document_store): +for idx, document in enumerate(tqdm(document_store)): + + print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n") result = qag_pipeline.run(documents=[document]) - pprint(result) + print_questions(result) + # This Haystack script was made with love by deepset in Berlin, Germany # Haystack: https://github.com/deepset-ai/haystack diff --git a/tutorials/Tutorial14_Query_Classifier.ipynb b/tutorials/Tutorial14_Query_Classifier.ipynb index 209b071e0..50689a46c 100644 --- a/tutorials/Tutorial14_Query_Classifier.ipynb +++ b/tutorials/Tutorial14_Query_Classifier.ipynb @@ -1567,14 +1567,14 @@ " query=\"Who is the father of Arya Stark?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_1)\n", + "print_answers(res_1, details=\"minimum\")\n", "\n", "# Run only the sparse retriever on a keyword based query\n", "res_2 = sklearn_keyword_classifier.run(\n", " query=\"arya stark father\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_2)\n" + "print_answers(res_2, details=\"minimum\")\n" ], "outputs": [], "metadata": { @@ -1591,14 +1591,14 @@ " query=\"which country was jon snow filmed ?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_3)\n", + "print_answers(res_3, details=\"minimum\")\n", "\n", "# Run only the sparse retriever on a keyword based query\n", "res_4 = sklearn_keyword_classifier.run(\n", " query=\"jon snow country\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_4)" + "print_answers(res_4, details=\"minimum\")" ], "outputs": [], "metadata": { @@ -1614,14 +1614,14 @@ " query=\"who are the younger brothers of arya stark ?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_5)\n", + "print_answers(res_5, details=\"minimum\")\n", "\n", "# Run only the sparse retriever on a keyword based query\n", "res_6 = sklearn_keyword_classifier.run(\n", " query=\"arya stark younger brothers\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_6)" + "print_answers(res_6, details=\"minimum\")" ], "outputs": [], "metadata": { @@ -1670,14 +1670,14 @@ " query=\"Who is the father of Arya Stark?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_1)\n", + "print_answers(res_1, details=\"minimum\")\n", "\n", "# Run only the sparse retriever on a keyword based query\n", "res_2 = transformer_keyword_classifier.run(\n", " query=\"arya stark father\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_2)\n" + "print_answers(res_2, details=\"minimum\")\n" ], "outputs": [], "metadata": { @@ -1694,14 +1694,14 @@ " query=\"which country was jon snow filmed ?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_3)\n", + "print_answers(res_3, details=\"minimum\")\n", "\n", "# Run only the sparse retriever on a keyword based query\n", "res_4 = transformer_keyword_classifier.run(\n", " query=\"jon snow country\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_4)" + "print_answers(res_4, details=\"minimum\")" ], "outputs": [], "metadata": { @@ -1717,14 +1717,14 @@ " query=\"who are the younger brothers of arya stark ?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_5)\n", + "print_answers(res_5, details=\"minimum\")\n", "\n", "# Run only the sparse retriever on a keyword based query\n", "res_6 = transformer_keyword_classifier.run(\n", " query=\"arya stark younger brothers\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_6)" + "print_answers(res_6, details=\"minimum\")" ], "outputs": [], "metadata": { @@ -1771,14 +1771,14 @@ " query=\"Who is the father of Arya Stark?\"\n", ")\n", "print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n", - "print_answers(res_1)\n", + "print_answers(res_1, details=\"minimum\")\n", "\n", "# Show only DPR results\n", "res_2 = transformer_question_classifier.run(\n", " query=\"Arya Stark was the daughter of a Lord.\"\n", ")\n", "print(\"ES Results\" + \"\\n\" + \"=\"*15)\n", - "res_2" + "print_answers(res_2, details=\"minimum\")" ], "outputs": [], "metadata": { diff --git a/tutorials/Tutorial14_Query_Classifier.py b/tutorials/Tutorial14_Query_Classifier.py index 347983e16..7ecd859a4 100644 --- a/tutorials/Tutorial14_Query_Classifier.py +++ b/tutorials/Tutorial14_Query_Classifier.py @@ -35,7 +35,9 @@ def tutorial14_query_classifier(): reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2") - + print() + print("Sklearn keyword classifier") + print("==========================") # Here we build the pipeline sklearn_keyword_classifier = Pipeline() sklearn_keyword_classifier.add_node(component=SklearnQueryClassifier(), name="QueryClassifier", inputs=["Query"]) @@ -48,44 +50,53 @@ def tutorial14_query_classifier(): res_1 = sklearn_keyword_classifier.run( query="Who is the father of Arya Stark?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_1) + print_answers(res_1, details="minimum") # Run only the sparse retriever on a keyword based query res_2 = sklearn_keyword_classifier.run( query="arya stark father", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - print_answers(res_2) + print_answers(res_2, details="minimum") # Run only the dense retriever on the full sentence query res_3 = sklearn_keyword_classifier.run( query="which country was jon snow filmed ?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_3) + print_answers(res_3, details="minimum") # Run only the sparse retriever on a keyword based query res_4 = sklearn_keyword_classifier.run( query="jon snow country", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - print_answers(res_4) + print_answers(res_4, details="minimum") # Run only the dense retriever on the full sentence query res_5 = sklearn_keyword_classifier.run( query="who are the younger brothers of arya stark ?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_5) + print_answers(res_5, details="minimum") # Run only the sparse retriever on a keyword based query res_6 = sklearn_keyword_classifier.run( query="arya stark younger brothers", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - print_answers(res_6) + print_answers(res_6, details="minimum") + print() + print("Transformer keyword classifier") + print("==============================") # Here we build the pipeline transformer_keyword_classifier = Pipeline() transformer_keyword_classifier.add_node(component=TransformersQueryClassifier(), name="QueryClassifier", inputs=["Query"]) @@ -98,43 +109,53 @@ def tutorial14_query_classifier(): res_1 = transformer_keyword_classifier.run( query="Who is the father of Arya Stark?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_1) + print_answers(res_1, details="minimum") # Run only the sparse retriever on a keyword based query res_2 = transformer_keyword_classifier.run( query="arya stark father", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - print_answers(res_2) + print_answers(res_2, details="minimum") # Run only the dense retriever on the full sentence query res_3 = transformer_keyword_classifier.run( query="which country was jon snow filmed ?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_3) + print_answers(res_3, details="minimum") # Run only the sparse retriever on a keyword based query res_4 = transformer_keyword_classifier.run( query="jon snow country", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - print_answers(res_4) + print_answers(res_4, details="minimum") # Run only the dense retriever on the full sentence query res_5 = transformer_keyword_classifier.run( query="who are the younger brothers of arya stark ?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_5) + print_answers(res_5, details="minimum") # Run only the sparse retriever on a keyword based query res_6 = transformer_keyword_classifier.run( query="arya stark younger brothers", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - print_answers(res_6) + print_answers(res_6, details="minimum") + + print() + print("Transformer question classifier") + print("===============================") # Here we build the pipeline transformer_question_classifier = Pipeline() @@ -147,15 +168,17 @@ def tutorial14_query_classifier(): res_1 = transformer_question_classifier.run( query="Who is the father of Arya Stark?", ) + print("\n===============================") print("DPR Results" + "\n" + "="*15) - print_answers(res_1) + print_answers(res_1, details="minimum") # Show only DPR results res_2 = transformer_question_classifier.run( query="Arya Stark was the daughter of a Lord.", ) + print("\n===============================") print("ES Results" + "\n" + "="*15) - res_2 + print_answers(res_2, details="minimum") # Here we create the keyword vs question/statement query classifier diff --git a/tutorials/Tutorial1_Basic_QA_Pipeline.ipynb b/tutorials/Tutorial1_Basic_QA_Pipeline.ipynb index 0da77e0bc..fcd539794 100644 --- a/tutorials/Tutorial1_Basic_QA_Pipeline.ipynb +++ b/tutorials/Tutorial1_Basic_QA_Pipeline.ipynb @@ -368,7 +368,38 @@ "cell_type": "code", "execution_count": null, "source": [ - "print_answers(prediction, details=\"minimal\")" + "# Now you can either print the object directly...\n", + "from pprint import pprint\n", + "\n", + "pprint(prediction)\n", + "\n", + "# Sample output: \n", + "# {\n", + "# 'answers': [ ,\n", + "# ,\n", + "# ...\n", + "# ]\n", + "# 'documents': [ ,\n", + "# ,\n", + "# ...\n", + "# ],\n", + "# 'no_ans_gap': 11.688868522644043,\n", + "# 'node_id': 'Reader',\n", + "# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n", + "# 'query': 'Who is the father of Arya Stark?',\n", + "# 'root_node': 'Query'\n", + "# }\n" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# ...or use a util to simplify the output\n", + "# Change `minimum` to `medium` or `all` to raise the level of detail\n", + "print_answers(prediction, details=\"minimum\")" ], "outputs": [], "metadata": { diff --git a/tutorials/Tutorial1_Basic_QA_Pipeline.py b/tutorials/Tutorial1_Basic_QA_Pipeline.py index 9f2e00931..f2227fda3 100755 --- a/tutorials/Tutorial1_Basic_QA_Pipeline.py +++ b/tutorials/Tutorial1_Basic_QA_Pipeline.py @@ -134,7 +134,37 @@ def tutorial1_basic_qa_pipeline(): # prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}}) # prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}}) - print_answers(prediction, details="minimal") + # Now you can either print the object directly + print("\n\nRaw object:\n") + from pprint import pprint + pprint(prediction) + + # Sample output: + # { + # 'answers': [ , + # , + # ... + # ] + # 'documents': [ , + # , + # ... + # ], + # 'no_ans_gap': 11.688868522644043, + # 'node_id': 'Reader', + # 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}}, + # 'query': 'Who is the father of Arya Stark?', + # 'root_node': 'Query' + # } + + # Note that the documents contained in the above object are the documents filtered by the Retriever from + # the document store. Although the answers were extracted from these documents, it's possible that many + # answers were taken from a single one of them, and that some of the documents were not source of any answer. + + # Or use a util to simplify the output + # Change `minimum` to `medium` or `all` to raise the level of detail + print("\n\nSimplified output:\n") + print_answers(prediction, details="minimum") + if __name__ == "__main__": diff --git a/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.ipynb b/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.ipynb index 7818e565d..c4b4788c5 100644 --- a/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.ipynb +++ b/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.ipynb @@ -358,11 +358,42 @@ "outputs": [], "metadata": {} }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "# Now you can either print the object directly...\n", + "from pprint import pprint\n", + "\n", + "pprint(prediction)\n", + "\n", + "# Sample output: \n", + "# {\n", + "# 'answers': [ ,\n", + "# ,\n", + "# ...\n", + "# ]\n", + "# 'documents': [ ,\n", + "# ,\n", + "# ...\n", + "# ],\n", + "# 'no_ans_gap': 11.688868522644043,\n", + "# 'node_id': 'Reader',\n", + "# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n", + "# 'query': 'Who is the father of Arya Stark?',\n", + "# 'root_node': 'Query'\n", + "# }" + ], + "outputs": [], + "metadata": {} + }, { "cell_type": "code", "execution_count": 11, "source": [ - "print_answers(prediction, details=\"minimal\")" + "# ...or use a util to simplify the output\n", + "# Change `minimum` to `medium` or `all` to raise the level of detail\n", + "print_answers(prediction, details=\"minimum\")" ], "outputs": [ { diff --git a/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.py b/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.py index 4b47d26e9..7102a849e 100644 --- a/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.py +++ b/tutorials/Tutorial3_Basic_QA_Pipeline_without_Elasticsearch.py @@ -101,7 +101,36 @@ def tutorial3_basic_qa_pipeline_without_elasticsearch(): # prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}}) # prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}}) - print_answers(prediction, details="minimal") + # Now you can either print the object directly + print("\n\nRaw object:\n") + from pprint import pprint + pprint(prediction) + + # Sample output: + # { + # 'answers': [ , + # , + # ... + # ] + # 'documents': [ , + # , + # ... + # ], + # 'no_ans_gap': 11.688868522644043, + # 'node_id': 'Reader', + # 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}}, + # 'query': 'Who is the father of Arya Stark?', + # 'root_node': 'Query' + # } + + # Note that the documents contained in the above object are the documents filtered by the Retriever from + # the document store. Although the answers were extracted from these documents, it's possible that many + # answers were taken from a single one of them, and that some of the documents were not source of any answer. + + # Or use a util to simplify the output + # Change `minimum` to `medium` or `all` to raise the level of detail + print("\n\nSimplified output:\n") + print_answers(prediction, details="minimum") if __name__ == "__main__": diff --git a/tutorials/Tutorial4_FAQ_style_QA.ipynb b/tutorials/Tutorial4_FAQ_style_QA.ipynb index 732e22f87..23ab7c167 100644 --- a/tutorials/Tutorial4_FAQ_style_QA.ipynb +++ b/tutorials/Tutorial4_FAQ_style_QA.ipynb @@ -265,12 +265,10 @@ "cell_type": "code", "execution_count": null, "source": [ + "from haystack.utils import print_answers\n", + "\n", "prediction = pipe.run(query=\"How is the virus spreading?\", params={\"Retriever\": {\"top_k\": 10}})\n", - "for a in prediction[\"answers\"]:\n", - " print(f\"Answer: {a.answer}\")\n", - " print(f\"Question: {a.meta['query']}\")\n", - " print(f\"Score: {a.score}\")\n", - " print(\"---------------------\")" + "print_answers(prediction, details=\"medium\")" ], "outputs": [], "metadata": { diff --git a/tutorials/Tutorial4_FAQ_style_QA.py b/tutorials/Tutorial4_FAQ_style_QA.py index d6bffeb02..dc3a41ca6 100755 --- a/tutorials/Tutorial4_FAQ_style_QA.py +++ b/tutorials/Tutorial4_FAQ_style_QA.py @@ -1,7 +1,7 @@ from haystack.document_stores import ElasticsearchDocumentStore from haystack.nodes import EmbeddingRetriever -from haystack.utils import launch_es +from haystack.utils import launch_es, print_answers import pandas as pd import requests import logging @@ -66,17 +66,13 @@ def tutorial4_faq_style_qa(): docs_to_index = df.to_dict(orient="records") document_store.write_documents(docs_to_index) - # Initialize a Pipeline (this time without a reader) and ask questions + # Initialize a Pipeline (this time without a reader) and ask questions from haystack.pipelines import FAQPipeline pipe = FAQPipeline(retriever=retriever) prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}}) - for a in prediction["answers"]: - print(f"Answer: {a.answer}") - print(f"Question: {a.meta['query']}") - print(f"Score: {a.score}") - print("---------------------") + print_answers(prediction, details="medium") if __name__ == "__main__": diff --git a/tutorials/Tutorial6_Better_Retrieval_via_DPR.py b/tutorials/Tutorial6_Better_Retrieval_via_DPR.py index ab3e26e5b..02d2a01de 100755 --- a/tutorials/Tutorial6_Better_Retrieval_via_DPR.py +++ b/tutorials/Tutorial6_Better_Retrieval_via_DPR.py @@ -67,7 +67,7 @@ def tutorial6_better_retrieval_via_dpr(): # prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}}) # prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}}) - print_answers(prediction, details="minimal") + print_answers(prediction, details="minimum") if __name__ == "__main__": diff --git a/tutorials/Tutorial7_RAG_Generator.ipynb b/tutorials/Tutorial7_RAG_Generator.ipynb index ee3f6de5b..53c8ff859 100644 --- a/tutorials/Tutorial7_RAG_Generator.ipynb +++ b/tutorials/Tutorial7_RAG_Generator.ipynb @@ -330,11 +330,12 @@ "source": [ "# Or alternatively use the Pipeline class\n", "from haystack.pipelines import GenerativeQAPipeline\n", + "from haystack.utils import print_answers\n", "\n", "pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)\n", "for question in QUESTIONS:\n", " res = pipe.run(query=question, params={\"Generator\": {\"top_k\": 1}, \"Retriever\": {\"top_k\": 5}})\n", - " print(res)" + " print_answers(res, details=\"minimum\")" ], "outputs": [], "metadata": { diff --git a/tutorials/Tutorial7_RAG_Generator.py b/tutorials/Tutorial7_RAG_Generator.py index c1c5f3eb9..30b0e3e88 100644 --- a/tutorials/Tutorial7_RAG_Generator.py +++ b/tutorials/Tutorial7_RAG_Generator.py @@ -4,6 +4,7 @@ import pandas as pd from haystack import Document from haystack.document_stores import FAISSDocumentStore from haystack.nodes import RAGenerator, DensePassageRetriever +from haystack.utils import print_answers def tutorial7_rag_generator(): @@ -35,7 +36,6 @@ def tutorial7_rag_generator(): ) ) - # Initialize FAISS document store to documents and corresponding index for embeddings # Set `return_embedding` to `True`, so generator doesn't have to perform re-embedding document_store = FAISSDocumentStore( @@ -108,14 +108,14 @@ def tutorial7_rag_generator(): # Print you answer answers = predicted_result["answers"] - print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'') + print(f' -> Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'') # Or alternatively use the Pipeline class from haystack.pipelines import GenerativeQAPipeline pipe = GenerativeQAPipeline(generator=generator, retriever=retriever) for question in QUESTIONS: res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}}) - print(res) + print_answers(res, details="minimum") if __name__ == "__main__": tutorial7_rag_generator() diff --git a/tutorials/Tutorial8_Preprocessing.py b/tutorials/Tutorial8_Preprocessing.py index 83ded0a70..a9e687c0b 100644 --- a/tutorials/Tutorial8_Preprocessing.py +++ b/tutorials/Tutorial8_Preprocessing.py @@ -80,7 +80,7 @@ def tutorial8_preprocessing(): split_respect_sentence_boundary=True ) docs_default = preprocessor.process(doc_txt) - print(f"n_docs_input: 1\nn_docs_output: {len(docs_default)}") + print(f"\nn_docs_input: 1\nn_docs_output: {len(docs_default)}") """ ## Cleaning @@ -101,13 +101,14 @@ def tutorial8_preprocessing(): preprocessor_nrsb = PreProcessor(split_respect_sentence_boundary=False) docs_nrsb = preprocessor_nrsb.process(doc_txt) - print("RESPECTING SENTENCE BOUNDARY") + print("\nRESPECTING SENTENCE BOUNDARY:") end_text = docs_default[0]["content"][-50:] print("End of document: \"..." + end_text + "\"") - print() - print("NOT RESPECTING SENTENCE BOUNDARY") + + print("\nNOT RESPECTING SENTENCE BOUNDARY:") end_text_nrsb = docs_nrsb[0]["content"][-50:] print("End of document: \"..." + end_text_nrsb + "\"") + print() """ A commonly used strategy to split long documents, especially in the field of Question Answering,