mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-02 04:46:31 +00:00
Improve tutorials' output (#1694)
* Modify __str__ and __repr__ for Document and Answer * Rename QueryClassifier in Tutorial11 * Improve the output of tutorial1 * Make the output of Tutorial8 a bit less dense * Add a print_questions util to print the output of question generating pipelines * Replace custom printing with the new utility in Tutorial13 * Ensure all output is printed with minimal details in Tutorial14 and add some titles * Minor change to print_answers * Make tutorial3's output the same as tutorial1 * Add __repr__ to Answer and fix to_dict() * Fix a bug in the Document and Answer's __str__ method * Improve print_answers, print_documents and print_questions * Using print_answers in Tutorial7 and fixing typo in the utils * Remove duplicate line in Tutorial12 * Use print_answers in Tutorial4 * Add explanation of what the documents in the output of the basic QA pipeline are * Move the fields constant into print_answers * Normalize all 'minimal' to 'minimum' (they were mixed up) * Improve the sample output to include all fields from Document and Answer Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
861522b6b1
commit
91cafb49bb
@ -237,7 +237,35 @@ prediction = pipe.run(
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
print_answers(prediction, details="minimal")
|
# Now you can either print the object directly...
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
pprint(prediction)
|
||||||
|
|
||||||
|
# Sample output:
|
||||||
|
# {
|
||||||
|
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
|
||||||
|
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
|
||||||
|
# ...
|
||||||
|
# ],
|
||||||
|
# 'no_ans_gap': 11.688868522644043,
|
||||||
|
# 'node_id': 'Reader',
|
||||||
|
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
|
||||||
|
# 'query': 'Who is the father of Arya Stark?',
|
||||||
|
# 'root_node': 'Query'
|
||||||
|
# }
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
# ...or use a util to simplify the output
|
||||||
|
# Change `minimum` to `medium` or `all` to raise the level of detail
|
||||||
|
print_answers(prediction, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
## About us
|
## About us
|
||||||
|
@ -296,7 +296,7 @@ Below, we define a very naive `QueryClassifier` and show how to use it:
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
class QueryClassifier(BaseComponent):
|
class CustomQueryClassifier(BaseComponent):
|
||||||
outgoing_edges = 2
|
outgoing_edges = 2
|
||||||
|
|
||||||
def run(self, query: str):
|
def run(self, query: str):
|
||||||
@ -307,7 +307,7 @@ class QueryClassifier(BaseComponent):
|
|||||||
|
|
||||||
# Here we build the pipeline
|
# Here we build the pipeline
|
||||||
p_classifier = Pipeline()
|
p_classifier = Pipeline()
|
||||||
p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
||||||
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
|
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
|
||||||
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
|
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
|
||||||
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])
|
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])
|
||||||
|
@ -42,7 +42,8 @@ from tqdm import tqdm
|
|||||||
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
|
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
|
||||||
from haystack.document_stores import ElasticsearchDocumentStore
|
from haystack.document_stores import ElasticsearchDocumentStore
|
||||||
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
|
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
|
||||||
from haystack.utils import launch_es
|
from haystack.utils import launch_es, print_questions
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Let's start an Elasticsearch instance with one of the options below:
|
Let's start an Elasticsearch instance with one of the options below:
|
||||||
@ -98,9 +99,11 @@ which the the document can answer.
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
|
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
|
||||||
for document in document_store:
|
for idx, document in enumerate(document_store):
|
||||||
|
|
||||||
|
print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n")
|
||||||
result = question_generation_pipeline.run(documents=[document])
|
result = question_generation_pipeline.run(documents=[document])
|
||||||
pprint(result)
|
print_questions(result)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Retriever Question Generation Pipeline
|
## Retriever Question Generation Pipeline
|
||||||
@ -111,8 +114,10 @@ This pipeline takes a query as input. It retrieves relevant documents and then g
|
|||||||
```python
|
```python
|
||||||
retriever = ElasticsearchRetriever(document_store=document_store)
|
retriever = ElasticsearchRetriever(document_store=document_store)
|
||||||
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)
|
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)
|
||||||
|
|
||||||
|
print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n")
|
||||||
result = rqg_pipeline.run(query="Arya Stark")
|
result = rqg_pipeline.run(query="Arya Stark")
|
||||||
pprint(result)
|
print_questions(result)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Question Answer Generation Pipeline
|
## Question Answer Generation Pipeline
|
||||||
@ -124,9 +129,11 @@ a Reader model
|
|||||||
```python
|
```python
|
||||||
reader = FARMReader("deepset/roberta-base-squad2")
|
reader = FARMReader("deepset/roberta-base-squad2")
|
||||||
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
|
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
|
||||||
for document in tqdm(document_store):
|
for idx, document in enumerate(tqdm(document_store)):
|
||||||
|
|
||||||
|
print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n")
|
||||||
result = qag_pipeline.run(documents=[document])
|
result = qag_pipeline.run(documents=[document])
|
||||||
pprint(result)
|
print_questions(result)
|
||||||
```
|
```
|
||||||
|
|
||||||
## About us
|
## About us
|
||||||
|
@ -161,14 +161,14 @@ res_1 = sklearn_keyword_classifier.run(
|
|||||||
query="Who is the father of Arya Stark?"
|
query="Who is the father of Arya Stark?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_1)
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_2 = sklearn_keyword_classifier.run(
|
res_2 = sklearn_keyword_classifier.run(
|
||||||
query="arya stark father"
|
query="arya stark father"
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_2)
|
print_answers(res_2, details="minimum")
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -180,14 +180,14 @@ res_3 = sklearn_keyword_classifier.run(
|
|||||||
query="which country was jon snow filmed ?"
|
query="which country was jon snow filmed ?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_3)
|
print_answers(res_3, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_4 = sklearn_keyword_classifier.run(
|
res_4 = sklearn_keyword_classifier.run(
|
||||||
query="jon snow country"
|
query="jon snow country"
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_4)
|
print_answers(res_4, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -197,14 +197,14 @@ res_5 = sklearn_keyword_classifier.run(
|
|||||||
query="who are the younger brothers of arya stark ?"
|
query="who are the younger brothers of arya stark ?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_5)
|
print_answers(res_5, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_6 = sklearn_keyword_classifier.run(
|
res_6 = sklearn_keyword_classifier.run(
|
||||||
query="arya stark younger brothers"
|
query="arya stark younger brothers"
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_6)
|
print_answers(res_6, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Transformer Keyword vs Question/Statement Classifier
|
## Transformer Keyword vs Question/Statement Classifier
|
||||||
@ -234,14 +234,14 @@ res_1 = transformer_keyword_classifier.run(
|
|||||||
query="Who is the father of Arya Stark?"
|
query="Who is the father of Arya Stark?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_1)
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_2 = transformer_keyword_classifier.run(
|
res_2 = transformer_keyword_classifier.run(
|
||||||
query="arya stark father"
|
query="arya stark father"
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_2)
|
print_answers(res_2, details="minimum")
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -253,14 +253,14 @@ res_3 = transformer_keyword_classifier.run(
|
|||||||
query="which country was jon snow filmed ?"
|
query="which country was jon snow filmed ?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_3)
|
print_answers(res_3, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_4 = transformer_keyword_classifier.run(
|
res_4 = transformer_keyword_classifier.run(
|
||||||
query="jon snow country"
|
query="jon snow country"
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_4)
|
print_answers(res_4, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -270,14 +270,14 @@ res_5 = transformer_keyword_classifier.run(
|
|||||||
query="who are the younger brothers of arya stark ?"
|
query="who are the younger brothers of arya stark ?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_5)
|
print_answers(res_5, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_6 = transformer_keyword_classifier.run(
|
res_6 = transformer_keyword_classifier.run(
|
||||||
query="arya stark younger brothers"
|
query="arya stark younger brothers"
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_6)
|
print_answers(res_6, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Question vs Statement Classifier
|
## Question vs Statement Classifier
|
||||||
@ -305,14 +305,14 @@ res_1 = transformer_question_classifier.run(
|
|||||||
query="Who is the father of Arya Stark?"
|
query="Who is the father of Arya Stark?"
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_1)
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
# Show only DPR results
|
# Show only DPR results
|
||||||
res_2 = transformer_question_classifier.run(
|
res_2 = transformer_question_classifier.run(
|
||||||
query="Arya Stark was the daughter of a Lord."
|
query="Arya Stark was the daughter of a Lord."
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
res_2
|
print_answers(res_2, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Standalone Query Classifier
|
## Standalone Query Classifier
|
||||||
|
@ -182,7 +182,34 @@ prediction = pipe.run(
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
print_answers(prediction, details="minimal")
|
# Now you can either print the object directly...
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
pprint(prediction)
|
||||||
|
|
||||||
|
# Sample output:
|
||||||
|
# {
|
||||||
|
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
|
||||||
|
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
|
||||||
|
# ...
|
||||||
|
# ],
|
||||||
|
# 'no_ans_gap': 11.688868522644043,
|
||||||
|
# 'node_id': 'Reader',
|
||||||
|
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
|
||||||
|
# 'query': 'Who is the father of Arya Stark?',
|
||||||
|
# 'root_node': 'Query'
|
||||||
|
# }
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
# ...or use a util to simplify the output
|
||||||
|
# Change `minimum` to `medium` or `all` to raise the level of detail
|
||||||
|
print_answers(prediction, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
## About us
|
## About us
|
||||||
|
@ -155,12 +155,10 @@ pipe = FAQPipeline(retriever=retriever)
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from haystack.utils import print_answers
|
||||||
|
|
||||||
prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
|
prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
|
||||||
for a in prediction["answers"]:
|
print_answers(prediction, details="medium")
|
||||||
print(f"Answer: {a.answer}")
|
|
||||||
print(f"Question: {a.meta['query']}")
|
|
||||||
print(f"Score: {a.score}")
|
|
||||||
print("---------------------")
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## About us
|
## About us
|
||||||
|
@ -193,11 +193,12 @@ for question in QUESTIONS:
|
|||||||
```python
|
```python
|
||||||
# Or alternatively use the Pipeline class
|
# Or alternatively use the Pipeline class
|
||||||
from haystack.pipelines import GenerativeQAPipeline
|
from haystack.pipelines import GenerativeQAPipeline
|
||||||
|
from haystack.utils import print_answers
|
||||||
|
|
||||||
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
|
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
|
||||||
for question in QUESTIONS:
|
for question in QUESTIONS:
|
||||||
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
|
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
|
||||||
print(res)
|
print_answers(res, details="minimum")
|
||||||
```
|
```
|
||||||
|
|
||||||
## About us
|
## About us
|
||||||
|
@ -186,10 +186,13 @@ class Document:
|
|||||||
getattr(other, 'id_hash_keys', None) == self.id_hash_keys)
|
getattr(other, 'id_hash_keys', None) == self.id_hash_keys)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(self.to_dict())
|
return f"<Document: {str(self.to_dict())}>"
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f"content: {self.content[:100]} {'[...]' if len(self.content) > 100 else ''}"
|
# In some cases, self.content is None (therefore not subscriptable)
|
||||||
|
if not self.content:
|
||||||
|
return f"<Document: id={self.id}, content=None>"
|
||||||
|
return f"<Document: id={self.id}, content='{self.content[:100]} {'...' if len(self.content) > 100 else ''}'>"
|
||||||
|
|
||||||
def __lt__(self, other):
|
def __lt__(self, other):
|
||||||
""" Enable sorting of Documents by score """
|
""" Enable sorting of Documents by score """
|
||||||
@ -262,7 +265,13 @@ class Answer:
|
|||||||
return self.score < other.score
|
return self.score < other.score
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f"answer: {self.answer} \nscore: {self.score} \ncontext: {self.context}"
|
# self.context might be None (therefore not subscriptable)
|
||||||
|
if not self.context:
|
||||||
|
return f"<Answer: answer='{self.answer}', score={self.score}, context=None>"
|
||||||
|
return f"<Answer: answer='{self.answer}', score={self.score}, context='{self.context[:50]}{'...' if len(self.context) > 50 else ''}'>"
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<Answer {asdict(self)}>"
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return asdict(self)
|
return asdict(self)
|
||||||
|
@ -16,6 +16,7 @@ from haystack.utils.doc_store import (
|
|||||||
from haystack.utils.export_utils import (
|
from haystack.utils.export_utils import (
|
||||||
print_answers,
|
print_answers,
|
||||||
print_documents,
|
print_documents,
|
||||||
|
print_questions,
|
||||||
export_answers_to_csv,
|
export_answers_to_csv,
|
||||||
convert_labels_to_squad,
|
convert_labels_to_squad,
|
||||||
)
|
)
|
||||||
|
@ -1,12 +1,8 @@
|
|||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
|
|
||||||
import io
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import json
|
import json
|
||||||
import pprint
|
import pprint
|
||||||
import logging
|
import logging
|
||||||
import subprocess
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
@ -16,58 +12,98 @@ from haystack.document_stores.sql import DocumentORM
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def print_answers(results: dict, details: str = "all", max_text_len: Optional[int] = None):
|
||||||
def print_answers(results: dict, details: str = "all"):
|
|
||||||
"""
|
"""
|
||||||
Utilitiy function to print results of Haystack pipelines
|
Utility function to print results of Haystack pipelines
|
||||||
:param results: Results from a pipeline
|
:param results: Results from a pipeline
|
||||||
:param details: One of ["minimum", "medium", "all]. Defining the level of details to print.
|
:param details: One of "minimum", "medium", "all". Defining the level of details to print.
|
||||||
|
:param max_text_lenght: shorten lengthy text fields to the maximum allowed length. Set to
|
||||||
|
None to not cut long text.
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
# TODO: unify the output format of Generator and Reader so that this function doesn't have the try/except
|
# Defines the fields to keep in the Answer for each detail level
|
||||||
# Or implement a class method like PredReader.print() and PredGenerator.print() that handles all this functionality.
|
fields_to_keep_by_level = {
|
||||||
# This default case is when the answers come from a Reader
|
"minimum": ["answer", "context"],
|
||||||
try:
|
"medium": ["answer", "context", "score"]
|
||||||
|
}
|
||||||
|
|
||||||
|
if not "answers" in results.keys():
|
||||||
|
raise ValueError("The results object does not seem to come from a Reader: "
|
||||||
|
f"it does not contain the 'answers' key, but only: {results.keys()}. "
|
||||||
|
"Try print_documents or print_questions.")
|
||||||
|
|
||||||
|
if "query" in results.keys():
|
||||||
|
print(f"\nQuery: {results['query']}\nAnswers:")
|
||||||
|
|
||||||
answers = results["answers"]
|
answers = results["answers"]
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
if details in ("minimal", "medium"):
|
|
||||||
if details == "minimal":
|
|
||||||
keys_to_keep = set(["answer", "context"])
|
|
||||||
elif details == "medium":
|
|
||||||
keys_to_keep = set(["answer", "context", "score"])
|
|
||||||
|
|
||||||
# filter the results
|
# Filter the results by detail level
|
||||||
filtered_answers = []
|
filtered_answers = []
|
||||||
|
if details in fields_to_keep_by_level.keys():
|
||||||
for ans in answers:
|
for ans in answers:
|
||||||
filtered_answers.append({k: getattr(ans, k) for k in keys_to_keep})
|
filtered_answers.append({k: getattr(ans, k) for k in fields_to_keep_by_level[details]})
|
||||||
|
elif details == "all":
|
||||||
|
filtered_answers = answers
|
||||||
|
else:
|
||||||
|
logging.warn(f"print_answers received details='{details}', which was not understood. "
|
||||||
|
"Valid values are 'minimum', 'medium', and 'all'. Using 'all'.")
|
||||||
|
filtered_answers = answers
|
||||||
|
|
||||||
|
# Shorten long text fields
|
||||||
|
if max_text_len is not None:
|
||||||
|
for ans in answers:
|
||||||
|
if "context" in ans.keys() and len(ans["context"]) > 50:
|
||||||
|
ans["context"] = ans["context"][:50] + "..."
|
||||||
|
|
||||||
pp.pprint(filtered_answers)
|
pp.pprint(filtered_answers)
|
||||||
else:
|
|
||||||
pp.pprint(results)
|
|
||||||
# This fall back case is when the answers come from a Generator
|
|
||||||
except:
|
|
||||||
if details == "minimal":
|
|
||||||
print(f"Query: {results['query']}")
|
|
||||||
for a in results["answers"]:
|
|
||||||
print(f"Answer: {a['answer']}")
|
|
||||||
else:
|
|
||||||
pp.pprint(results)
|
|
||||||
|
|
||||||
|
|
||||||
def print_documents(results: dict, max_text_len: Optional[int] = None, print_meta: bool = False):
|
def print_documents(results: dict, max_text_len: Optional[int] = None, print_name: bool = True, print_meta: bool = False):
|
||||||
print(f"Query: {results['query']}")
|
"""
|
||||||
|
Utility that prints a compressed representation of the documents returned by a pipeline.
|
||||||
|
:param max_text_lenght: shorten the document's content to a maximum number of chars. if None, does not cut.
|
||||||
|
:param print_name: whether to print the document's name (from the metadata) or not.
|
||||||
|
:param print_meta: whether to print the document's metadata or not.
|
||||||
|
"""
|
||||||
|
print(f"\nQuery: {results['query']}\n")
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
for d in results["documents"]:
|
|
||||||
print()
|
for doc in results["documents"]:
|
||||||
new_text = d.content[:max_text_len]
|
content = doc.content
|
||||||
if len(new_text) != len(d.content):
|
if max_text_len:
|
||||||
new_text += "..."
|
content = doc.content[:max_text_len] + ("..." if len(doc.content) > max_text_len else "")
|
||||||
results = {
|
results = {"content": content}
|
||||||
"name": d.meta.get("name", None),
|
if print_name:
|
||||||
"content": new_text
|
results["name"] = doc.meta.get("name", None)
|
||||||
}
|
|
||||||
if print_meta:
|
if print_meta:
|
||||||
results["meta"] = d.meta
|
results["meta"] = doc.meta
|
||||||
pp.pprint(results)
|
pp.pprint(results)
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def print_questions(results: dict):
|
||||||
|
"""
|
||||||
|
Utility to print the output of a question generating pipeline in a readable format.
|
||||||
|
"""
|
||||||
|
if "generated_questions" in results.keys():
|
||||||
|
print("\nGenerated questions:")
|
||||||
|
for result in results["generated_questions"]:
|
||||||
|
for question in result["questions"]:
|
||||||
|
print(f" - {question}")
|
||||||
|
|
||||||
|
elif "results" in results.keys():
|
||||||
|
print("\nGenerated pairs:")
|
||||||
|
for pair in results["results"]:
|
||||||
|
print(f" - Q:{pair['query']}")
|
||||||
|
for answer in pair["answers"]:
|
||||||
|
print(f" A: {answer.answer}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("This object does not seem to be the output "
|
||||||
|
"of a question generating pipeline: does not contain neither "
|
||||||
|
f"'generated_questions' nor 'results', but only: {results.keys()}. "
|
||||||
|
" Try `print_answers` or `print_documents`.")
|
||||||
|
|
||||||
|
|
||||||
def export_answers_to_csv(agg_results: list, output_file):
|
def export_answers_to_csv(agg_results: list, output_file):
|
||||||
|
@ -547,7 +547,7 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"source": [
|
"source": [
|
||||||
"class QueryClassifier(BaseComponent):\n",
|
"class CustomQueryClassifier(BaseComponent):\n",
|
||||||
" outgoing_edges = 2\n",
|
" outgoing_edges = 2\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def run(self, query: str):\n",
|
" def run(self, query: str):\n",
|
||||||
@ -558,7 +558,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Here we build the pipeline\n",
|
"# Here we build the pipeline\n",
|
||||||
"p_classifier = Pipeline()\n",
|
"p_classifier = Pipeline()\n",
|
||||||
"p_classifier.add_node(component=QueryClassifier(), name=\"QueryClassifier\", inputs=[\"Query\"])\n",
|
"p_classifier.add_node(component=CustomQueryClassifier(), name=\"QueryClassifier\", inputs=[\"Query\"])\n",
|
||||||
"p_classifier.add_node(component=es_retriever, name=\"ESRetriever\", inputs=[\"QueryClassifier.output_1\"])\n",
|
"p_classifier.add_node(component=es_retriever, name=\"ESRetriever\", inputs=[\"QueryClassifier.output_1\"])\n",
|
||||||
"p_classifier.add_node(component=dpr_retriever, name=\"DPRRetriever\", inputs=[\"QueryClassifier.output_2\"])\n",
|
"p_classifier.add_node(component=dpr_retriever, name=\"DPRRetriever\", inputs=[\"QueryClassifier.output_2\"])\n",
|
||||||
"p_classifier.add_node(component=reader, name=\"QAReader\", inputs=[\"ESRetriever\", \"DPRRetriever\"])\n",
|
"p_classifier.add_node(component=reader, name=\"QAReader\", inputs=[\"ESRetriever\", \"DPRRetriever\"])\n",
|
||||||
|
@ -2,7 +2,7 @@ from haystack.utils import clean_wiki_text, print_answers, print_documents, fetc
|
|||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
from haystack import Pipeline
|
from haystack import Pipeline
|
||||||
from haystack.document_stores import ElasticsearchDocumentStore
|
from haystack.document_stores import ElasticsearchDocumentStore
|
||||||
from haystack.nodes import ElasticsearchRetriever, DensePassageRetriever, FARMReader, RAGenerator, JoinDocuments
|
from haystack.nodes import ElasticsearchRetriever, DensePassageRetriever, FARMReader, RAGenerator, BaseComponent, JoinDocuments
|
||||||
from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline, GenerativeQAPipeline
|
from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline, GenerativeQAPipeline
|
||||||
|
|
||||||
|
|
||||||
@ -35,33 +35,44 @@ def tutorial11_pipelines():
|
|||||||
|
|
||||||
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
|
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
|
||||||
|
|
||||||
######################
|
print()
|
||||||
# Prebuilt Pipelines #
|
print("######################")
|
||||||
######################
|
print("# Prebuilt Pipelines #")
|
||||||
|
print("######################")
|
||||||
|
|
||||||
# Extractive QA Pipeline
|
print()
|
||||||
########################
|
print("# Extractive QA Pipeline")
|
||||||
|
print("########################")
|
||||||
|
|
||||||
|
query="Who is the father of Arya Stark?"
|
||||||
p_extractive_premade = ExtractiveQAPipeline(reader=reader, retriever=es_retriever)
|
p_extractive_premade = ExtractiveQAPipeline(reader=reader, retriever=es_retriever)
|
||||||
res = p_extractive_premade.run(
|
res = p_extractive_premade.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query=query,
|
||||||
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
|
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
|
||||||
)
|
)
|
||||||
print_answers(res, details="minimal")
|
print("\nQuery: ", query)
|
||||||
|
print("Answers:")
|
||||||
|
print_answers(res, details="minimum")
|
||||||
|
|
||||||
# Document Search Pipeline
|
|
||||||
##########################
|
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("# Document Search Pipeline")
|
||||||
|
print("##########################")
|
||||||
|
|
||||||
|
query="Who is the father of Arya Stark?"
|
||||||
p_retrieval = DocumentSearchPipeline(es_retriever)
|
p_retrieval = DocumentSearchPipeline(es_retriever)
|
||||||
res = p_retrieval.run(
|
res = p_retrieval.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query=query,
|
||||||
params={"Retriever": {"top_k": 10}},
|
params={"Retriever": {"top_k": 10}},
|
||||||
|
|
||||||
)
|
)
|
||||||
|
print()
|
||||||
print_documents(res, max_text_len=200)
|
print_documents(res, max_text_len=200)
|
||||||
|
|
||||||
# Generator Pipeline
|
|
||||||
##########################
|
print()
|
||||||
|
print("# Generator Pipeline")
|
||||||
|
print("####################")
|
||||||
|
|
||||||
# We set this to True so that the document store returns document embeddings
|
# We set this to True so that the document store returns document embeddings
|
||||||
# with each document, this is needed by the Generator
|
# with each document, this is needed by the Generator
|
||||||
@ -73,11 +84,12 @@ def tutorial11_pipelines():
|
|||||||
# Generative QA
|
# Generative QA
|
||||||
p_generator = GenerativeQAPipeline(generator=rag_generator, retriever=dpr_retriever)
|
p_generator = GenerativeQAPipeline(generator=rag_generator, retriever=dpr_retriever)
|
||||||
res = p_generator.run(
|
res = p_generator.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query=query,
|
||||||
params={"Retriever": {"top_k": 10}},
|
params={"Retriever": {"top_k": 10}},
|
||||||
|
|
||||||
)
|
)
|
||||||
print_answers(res, details="minimal")
|
print()
|
||||||
|
print_answers(res, details="minimum")
|
||||||
|
|
||||||
# We are setting this to False so that in later pipelines,
|
# We are setting this to False so that in later pipelines,
|
||||||
# we get a cleaner printout
|
# we get a cleaner printout
|
||||||
@ -91,12 +103,14 @@ def tutorial11_pipelines():
|
|||||||
p_retrieval.draw("pipeline_retrieval.png")
|
p_retrieval.draw("pipeline_retrieval.png")
|
||||||
p_generator.draw("pipeline_generator.png")
|
p_generator.draw("pipeline_generator.png")
|
||||||
|
|
||||||
####################
|
print()
|
||||||
# Custom Pipelines #
|
print("####################")
|
||||||
####################
|
print("# Custom Pipelines #")
|
||||||
|
print("####################")
|
||||||
|
|
||||||
# Extractive QA Pipeline
|
print()
|
||||||
########################
|
print("# Extractive QA Pipeline")
|
||||||
|
print("########################")
|
||||||
|
|
||||||
# Custom built extractive QA pipeline
|
# Custom built extractive QA pipeline
|
||||||
p_extractive = Pipeline()
|
p_extractive = Pipeline()
|
||||||
@ -104,15 +118,20 @@ def tutorial11_pipelines():
|
|||||||
p_extractive.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
p_extractive.add_node(component=reader, name="Reader", inputs=["Retriever"])
|
||||||
|
|
||||||
# Now we can run it
|
# Now we can run it
|
||||||
|
query="Who is the father of Arya Stark?"
|
||||||
res = p_extractive.run(
|
res = p_extractive.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query=query,
|
||||||
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
|
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}},
|
||||||
)
|
)
|
||||||
print_answers(res, details="minimal")
|
print("\nQuery: ", query)
|
||||||
|
print("Answers:")
|
||||||
|
print_answers(res, details="minimum")
|
||||||
p_extractive.draw("pipeline_extractive.png")
|
p_extractive.draw("pipeline_extractive.png")
|
||||||
|
|
||||||
# Ensembled Retriever Pipeline
|
|
||||||
##############################
|
print()
|
||||||
|
print("# Ensembled Retriever Pipeline")
|
||||||
|
print("##############################")
|
||||||
|
|
||||||
# Create ensembled pipeline
|
# Create ensembled pipeline
|
||||||
p_ensemble = Pipeline()
|
p_ensemble = Pipeline()
|
||||||
@ -123,22 +142,27 @@ def tutorial11_pipelines():
|
|||||||
p_ensemble.draw("pipeline_ensemble.png")
|
p_ensemble.draw("pipeline_ensemble.png")
|
||||||
|
|
||||||
# Run pipeline
|
# Run pipeline
|
||||||
|
query="Who is the father of Arya Stark?"
|
||||||
res = p_ensemble.run(
|
res = p_ensemble.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query="Who is the father of Arya Stark?",
|
||||||
params={"ESRetriever": {"top_k": 5}, "DPRRetriever": {"top_k": 5}},
|
params={"ESRetriever": {"top_k": 5}, "DPRRetriever": {"top_k": 5}},
|
||||||
|
|
||||||
)
|
)
|
||||||
print_answers(res, details="minimal")
|
print("\nQuery: ", query)
|
||||||
|
print("Answers:")
|
||||||
|
print_answers(res, details="minimum")
|
||||||
|
|
||||||
# Query Classification Pipeline
|
|
||||||
###############################
|
print()
|
||||||
|
print("# Query Classification Pipeline")
|
||||||
|
print("###############################")
|
||||||
|
|
||||||
# Decision Nodes help you route your data so that only certain branches of your `Pipeline` are run.
|
# Decision Nodes help you route your data so that only certain branches of your `Pipeline` are run.
|
||||||
# Though this looks very similar to the ensembled pipeline shown above,
|
# Though this looks very similar to the ensembled pipeline shown above,
|
||||||
# the key difference is that only one of the retrievers is run for each request.
|
# the key difference is that only one of the retrievers is run for each request.
|
||||||
# By contrast both retrievers are always run in the ensembled approach.
|
# By contrast both retrievers are always run in the ensembled approach.
|
||||||
|
|
||||||
class QueryClassifier():
|
class CustomQueryClassifier(BaseComponent):
|
||||||
outgoing_edges = 2
|
outgoing_edges = 2
|
||||||
|
|
||||||
def run(self, query):
|
def run(self, query):
|
||||||
@ -149,25 +173,32 @@ def tutorial11_pipelines():
|
|||||||
|
|
||||||
# Here we build the pipeline
|
# Here we build the pipeline
|
||||||
p_classifier = Pipeline()
|
p_classifier = Pipeline()
|
||||||
p_classifier.add_node(component=QueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
p_classifier.add_node(component=CustomQueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
||||||
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
|
p_classifier.add_node(component=es_retriever, name="ESRetriever", inputs=["QueryClassifier.output_1"])
|
||||||
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
|
p_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["QueryClassifier.output_2"])
|
||||||
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])
|
p_classifier.add_node(component=reader, name="QAReader", inputs=["ESRetriever", "DPRRetriever"])
|
||||||
p_classifier.draw("pipeline_classifier.png")
|
p_classifier.draw("pipeline_classifier.png")
|
||||||
|
|
||||||
# Run only the dense retriever on the full sentence query
|
# Run only the dense retriever on the full sentence query
|
||||||
|
query="Who is the father of Arya Stark?"
|
||||||
res_1 = p_classifier.run(
|
res_1 = p_classifier.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query=query,
|
||||||
)
|
)
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print()
|
||||||
print_answers(res_1)
|
print("\nQuery: ", query)
|
||||||
|
print(" * DPR Answers:")
|
||||||
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
|
query="Arya Stark father"
|
||||||
res_2 = p_classifier.run(
|
res_2 = p_classifier.run(
|
||||||
query="Arya Stark father",
|
query=query,
|
||||||
)
|
)
|
||||||
print("ES Results" + "\n" + "="*15)
|
print()
|
||||||
print_answers(res_2)
|
print("\nQuery: ", query)
|
||||||
|
print(" * ES Answers:")
|
||||||
|
print_answers(res_2, details="minimum")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -91,7 +91,6 @@ def tutorial12_lfqa():
|
|||||||
print(f"Query: {query_2}")
|
print(f"Query: {query_2}")
|
||||||
print(f"Answer: {result_2['answers'][0]}")
|
print(f"Answer: {result_2['answers'][0]}")
|
||||||
print()
|
print()
|
||||||
pipe.run(query=query_2, params={"Retriever": {"top_k": 1}})
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -66,7 +66,7 @@
|
|||||||
"from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader\n",
|
"from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader\n",
|
||||||
"from haystack.document_stores import ElasticsearchDocumentStore\n",
|
"from haystack.document_stores import ElasticsearchDocumentStore\n",
|
||||||
"from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline\n",
|
"from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline\n",
|
||||||
"from haystack.utils import launch_es"
|
"from haystack.utils import launch_es, print_questions\n"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -188,9 +188,11 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"source": [
|
"source": [
|
||||||
"question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n",
|
"question_generation_pipeline = QuestionGenerationPipeline(question_generator)\n",
|
||||||
"for document in document_store:\n",
|
"for idx, document in enumerate(document_store):\n",
|
||||||
|
" \n",
|
||||||
|
" print(f\"\\n * Generating questions for document {idx}: {document.content[:100]}...\\n\")\n",
|
||||||
" result = question_generation_pipeline.run(documents=[document])\n",
|
" result = question_generation_pipeline.run(documents=[document])\n",
|
||||||
" pprint(result)"
|
" print_questions(result)"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -220,8 +222,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"retriever = ElasticsearchRetriever(document_store=document_store)\n",
|
"retriever = ElasticsearchRetriever(document_store=document_store)\n",
|
||||||
"rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)\n",
|
"rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)\n",
|
||||||
|
"\n",
|
||||||
|
"print(f\"\\n * Generating questions for documents matching the query 'Arya Stark'\\n\")\n",
|
||||||
"result = rqg_pipeline.run(query=\"Arya Stark\")\n",
|
"result = rqg_pipeline.run(query=\"Arya Stark\")\n",
|
||||||
"pprint(result)"
|
"print_questions(result)"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -252,9 +256,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"reader = FARMReader(\"deepset/roberta-base-squad2\")\n",
|
"reader = FARMReader(\"deepset/roberta-base-squad2\")\n",
|
||||||
"qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)\n",
|
"qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)\n",
|
||||||
"for document in tqdm(document_store):\n",
|
"for idx, document in enumerate(tqdm(document_store)):\n",
|
||||||
|
"\n",
|
||||||
|
" print(f\"\\n * Generating questions and answers for document {idx}: {document.content[:100]}...\\n\")\n",
|
||||||
" result = qag_pipeline.run(documents=[document])\n",
|
" result = qag_pipeline.run(documents=[document])\n",
|
||||||
" pprint(result)"
|
" print_questions(result)"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -3,7 +3,7 @@ from pprint import pprint
|
|||||||
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
|
from haystack.nodes import QuestionGenerator, ElasticsearchRetriever, FARMReader
|
||||||
from haystack.document_stores import ElasticsearchDocumentStore
|
from haystack.document_stores import ElasticsearchDocumentStore
|
||||||
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
|
from haystack.pipelines import QuestionGenerationPipeline, RetrieverQuestionGenerationPipeline, QuestionAnswerGenerationPipeline
|
||||||
from haystack.utils import launch_es
|
from haystack.utils import launch_es, print_questions
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This is a bare bones tutorial showing what is possible with the QuestionGenerator Node which automatically generates
|
This is a bare bones tutorial showing what is possible with the QuestionGenerator Node which automatically generates
|
||||||
@ -34,20 +34,31 @@ which the the document can answer.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# QuestionGenerationPipeline
|
# QuestionGenerationPipeline
|
||||||
|
print("\nQuestionGenerationPipeline")
|
||||||
|
print("==========================")
|
||||||
|
|
||||||
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
|
question_generation_pipeline = QuestionGenerationPipeline(question_generator)
|
||||||
for document in document_store:
|
for idx, document in enumerate(document_store):
|
||||||
|
|
||||||
|
print(f"\n * Generating questions for document {idx}: {document.content[:100]}...\n")
|
||||||
result = question_generation_pipeline.run(documents=[document])
|
result = question_generation_pipeline.run(documents=[document])
|
||||||
pprint(result)
|
print_questions(result)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This pipeline takes a query as input. It retrievers relevant documents and then generates questions based on these.
|
This pipeline takes a query as input. It retrievers relevant documents and then generates questions based on these.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# RetrieverQuestionGenerationPipeline
|
# RetrieverQuestionGenerationPipeline
|
||||||
|
print("\RetrieverQuestionGenerationPipeline")
|
||||||
|
print("==================================")
|
||||||
|
|
||||||
retriever = ElasticsearchRetriever(document_store=document_store)
|
retriever = ElasticsearchRetriever(document_store=document_store)
|
||||||
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)
|
rqg_pipeline = RetrieverQuestionGenerationPipeline(retriever, question_generator)
|
||||||
|
|
||||||
|
print(f"\n * Generating questions for documents matching the query 'Arya Stark'\n")
|
||||||
result = rqg_pipeline.run(query="Arya Stark")
|
result = rqg_pipeline.run(query="Arya Stark")
|
||||||
pprint(result)
|
print_questions(result)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
This pipeline takes a document as input, generates questions on it, and attempts to answer these questions using
|
This pipeline takes a document as input, generates questions on it, and attempts to answer these questions using
|
||||||
@ -55,11 +66,17 @@ a Reader model
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# QuestionAnswerGenerationPipeline
|
# QuestionAnswerGenerationPipeline
|
||||||
|
print("\QuestionAnswerGenerationPipeline")
|
||||||
|
print("===============================")
|
||||||
|
|
||||||
reader = FARMReader("deepset/roberta-base-squad2")
|
reader = FARMReader("deepset/roberta-base-squad2")
|
||||||
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
|
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)
|
||||||
for document in tqdm(document_store):
|
for idx, document in enumerate(tqdm(document_store)):
|
||||||
|
|
||||||
|
print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n")
|
||||||
result = qag_pipeline.run(documents=[document])
|
result = qag_pipeline.run(documents=[document])
|
||||||
pprint(result)
|
print_questions(result)
|
||||||
|
|
||||||
|
|
||||||
# This Haystack script was made with love by deepset in Berlin, Germany
|
# This Haystack script was made with love by deepset in Berlin, Germany
|
||||||
# Haystack: https://github.com/deepset-ai/haystack
|
# Haystack: https://github.com/deepset-ai/haystack
|
||||||
|
@ -1567,14 +1567,14 @@
|
|||||||
" query=\"Who is the father of Arya Stark?\"\n",
|
" query=\"Who is the father of Arya Stark?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_1)\n",
|
"print_answers(res_1, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Run only the sparse retriever on a keyword based query\n",
|
"# Run only the sparse retriever on a keyword based query\n",
|
||||||
"res_2 = sklearn_keyword_classifier.run(\n",
|
"res_2 = sklearn_keyword_classifier.run(\n",
|
||||||
" query=\"arya stark father\"\n",
|
" query=\"arya stark father\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_2)\n"
|
"print_answers(res_2, details=\"minimum\")\n"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -1591,14 +1591,14 @@
|
|||||||
" query=\"which country was jon snow filmed ?\"\n",
|
" query=\"which country was jon snow filmed ?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_3)\n",
|
"print_answers(res_3, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Run only the sparse retriever on a keyword based query\n",
|
"# Run only the sparse retriever on a keyword based query\n",
|
||||||
"res_4 = sklearn_keyword_classifier.run(\n",
|
"res_4 = sklearn_keyword_classifier.run(\n",
|
||||||
" query=\"jon snow country\"\n",
|
" query=\"jon snow country\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_4)"
|
"print_answers(res_4, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -1614,14 +1614,14 @@
|
|||||||
" query=\"who are the younger brothers of arya stark ?\"\n",
|
" query=\"who are the younger brothers of arya stark ?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_5)\n",
|
"print_answers(res_5, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Run only the sparse retriever on a keyword based query\n",
|
"# Run only the sparse retriever on a keyword based query\n",
|
||||||
"res_6 = sklearn_keyword_classifier.run(\n",
|
"res_6 = sklearn_keyword_classifier.run(\n",
|
||||||
" query=\"arya stark younger brothers\"\n",
|
" query=\"arya stark younger brothers\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_6)"
|
"print_answers(res_6, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -1670,14 +1670,14 @@
|
|||||||
" query=\"Who is the father of Arya Stark?\"\n",
|
" query=\"Who is the father of Arya Stark?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_1)\n",
|
"print_answers(res_1, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Run only the sparse retriever on a keyword based query\n",
|
"# Run only the sparse retriever on a keyword based query\n",
|
||||||
"res_2 = transformer_keyword_classifier.run(\n",
|
"res_2 = transformer_keyword_classifier.run(\n",
|
||||||
" query=\"arya stark father\"\n",
|
" query=\"arya stark father\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_2)\n"
|
"print_answers(res_2, details=\"minimum\")\n"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -1694,14 +1694,14 @@
|
|||||||
" query=\"which country was jon snow filmed ?\"\n",
|
" query=\"which country was jon snow filmed ?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_3)\n",
|
"print_answers(res_3, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Run only the sparse retriever on a keyword based query\n",
|
"# Run only the sparse retriever on a keyword based query\n",
|
||||||
"res_4 = transformer_keyword_classifier.run(\n",
|
"res_4 = transformer_keyword_classifier.run(\n",
|
||||||
" query=\"jon snow country\"\n",
|
" query=\"jon snow country\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_4)"
|
"print_answers(res_4, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -1717,14 +1717,14 @@
|
|||||||
" query=\"who are the younger brothers of arya stark ?\"\n",
|
" query=\"who are the younger brothers of arya stark ?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_5)\n",
|
"print_answers(res_5, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Run only the sparse retriever on a keyword based query\n",
|
"# Run only the sparse retriever on a keyword based query\n",
|
||||||
"res_6 = transformer_keyword_classifier.run(\n",
|
"res_6 = transformer_keyword_classifier.run(\n",
|
||||||
" query=\"arya stark younger brothers\"\n",
|
" query=\"arya stark younger brothers\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_6)"
|
"print_answers(res_6, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@ -1771,14 +1771,14 @@
|
|||||||
" query=\"Who is the father of Arya Stark?\"\n",
|
" query=\"Who is the father of Arya Stark?\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"DPR Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"print_answers(res_1)\n",
|
"print_answers(res_1, details=\"minimum\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Show only DPR results\n",
|
"# Show only DPR results\n",
|
||||||
"res_2 = transformer_question_classifier.run(\n",
|
"res_2 = transformer_question_classifier.run(\n",
|
||||||
" query=\"Arya Stark was the daughter of a Lord.\"\n",
|
" query=\"Arya Stark was the daughter of a Lord.\"\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
"print(\"ES Results\" + \"\\n\" + \"=\"*15)\n",
|
||||||
"res_2"
|
"print_answers(res_2, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -35,7 +35,9 @@ def tutorial14_query_classifier():
|
|||||||
|
|
||||||
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
|
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("Sklearn keyword classifier")
|
||||||
|
print("==========================")
|
||||||
# Here we build the pipeline
|
# Here we build the pipeline
|
||||||
sklearn_keyword_classifier = Pipeline()
|
sklearn_keyword_classifier = Pipeline()
|
||||||
sklearn_keyword_classifier.add_node(component=SklearnQueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
sklearn_keyword_classifier.add_node(component=SklearnQueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
||||||
@ -48,44 +50,53 @@ def tutorial14_query_classifier():
|
|||||||
res_1 = sklearn_keyword_classifier.run(
|
res_1 = sklearn_keyword_classifier.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query="Who is the father of Arya Stark?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_1)
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_2 = sklearn_keyword_classifier.run(
|
res_2 = sklearn_keyword_classifier.run(
|
||||||
query="arya stark father",
|
query="arya stark father",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_2)
|
print_answers(res_2, details="minimum")
|
||||||
|
|
||||||
# Run only the dense retriever on the full sentence query
|
# Run only the dense retriever on the full sentence query
|
||||||
res_3 = sklearn_keyword_classifier.run(
|
res_3 = sklearn_keyword_classifier.run(
|
||||||
query="which country was jon snow filmed ?",
|
query="which country was jon snow filmed ?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_3)
|
print_answers(res_3, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_4 = sklearn_keyword_classifier.run(
|
res_4 = sklearn_keyword_classifier.run(
|
||||||
query="jon snow country",
|
query="jon snow country",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_4)
|
print_answers(res_4, details="minimum")
|
||||||
|
|
||||||
# Run only the dense retriever on the full sentence query
|
# Run only the dense retriever on the full sentence query
|
||||||
res_5 = sklearn_keyword_classifier.run(
|
res_5 = sklearn_keyword_classifier.run(
|
||||||
query="who are the younger brothers of arya stark ?",
|
query="who are the younger brothers of arya stark ?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_5)
|
print_answers(res_5, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_6 = sklearn_keyword_classifier.run(
|
res_6 = sklearn_keyword_classifier.run(
|
||||||
query="arya stark younger brothers",
|
query="arya stark younger brothers",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_6)
|
print_answers(res_6, details="minimum")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("Transformer keyword classifier")
|
||||||
|
print("==============================")
|
||||||
# Here we build the pipeline
|
# Here we build the pipeline
|
||||||
transformer_keyword_classifier = Pipeline()
|
transformer_keyword_classifier = Pipeline()
|
||||||
transformer_keyword_classifier.add_node(component=TransformersQueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
transformer_keyword_classifier.add_node(component=TransformersQueryClassifier(), name="QueryClassifier", inputs=["Query"])
|
||||||
@ -98,43 +109,53 @@ def tutorial14_query_classifier():
|
|||||||
res_1 = transformer_keyword_classifier.run(
|
res_1 = transformer_keyword_classifier.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query="Who is the father of Arya Stark?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_1)
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_2 = transformer_keyword_classifier.run(
|
res_2 = transformer_keyword_classifier.run(
|
||||||
query="arya stark father",
|
query="arya stark father",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_2)
|
print_answers(res_2, details="minimum")
|
||||||
|
|
||||||
# Run only the dense retriever on the full sentence query
|
# Run only the dense retriever on the full sentence query
|
||||||
res_3 = transformer_keyword_classifier.run(
|
res_3 = transformer_keyword_classifier.run(
|
||||||
query="which country was jon snow filmed ?",
|
query="which country was jon snow filmed ?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_3)
|
print_answers(res_3, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_4 = transformer_keyword_classifier.run(
|
res_4 = transformer_keyword_classifier.run(
|
||||||
query="jon snow country",
|
query="jon snow country",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_4)
|
print_answers(res_4, details="minimum")
|
||||||
|
|
||||||
# Run only the dense retriever on the full sentence query
|
# Run only the dense retriever on the full sentence query
|
||||||
res_5 = transformer_keyword_classifier.run(
|
res_5 = transformer_keyword_classifier.run(
|
||||||
query="who are the younger brothers of arya stark ?",
|
query="who are the younger brothers of arya stark ?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_5)
|
print_answers(res_5, details="minimum")
|
||||||
|
|
||||||
# Run only the sparse retriever on a keyword based query
|
# Run only the sparse retriever on a keyword based query
|
||||||
res_6 = transformer_keyword_classifier.run(
|
res_6 = transformer_keyword_classifier.run(
|
||||||
query="arya stark younger brothers",
|
query="arya stark younger brothers",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
print_answers(res_6)
|
print_answers(res_6, details="minimum")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("Transformer question classifier")
|
||||||
|
print("===============================")
|
||||||
|
|
||||||
# Here we build the pipeline
|
# Here we build the pipeline
|
||||||
transformer_question_classifier = Pipeline()
|
transformer_question_classifier = Pipeline()
|
||||||
@ -147,15 +168,17 @@ def tutorial14_query_classifier():
|
|||||||
res_1 = transformer_question_classifier.run(
|
res_1 = transformer_question_classifier.run(
|
||||||
query="Who is the father of Arya Stark?",
|
query="Who is the father of Arya Stark?",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("DPR Results" + "\n" + "="*15)
|
print("DPR Results" + "\n" + "="*15)
|
||||||
print_answers(res_1)
|
print_answers(res_1, details="minimum")
|
||||||
|
|
||||||
# Show only DPR results
|
# Show only DPR results
|
||||||
res_2 = transformer_question_classifier.run(
|
res_2 = transformer_question_classifier.run(
|
||||||
query="Arya Stark was the daughter of a Lord.",
|
query="Arya Stark was the daughter of a Lord.",
|
||||||
)
|
)
|
||||||
|
print("\n===============================")
|
||||||
print("ES Results" + "\n" + "="*15)
|
print("ES Results" + "\n" + "="*15)
|
||||||
res_2
|
print_answers(res_2, details="minimum")
|
||||||
|
|
||||||
# Here we create the keyword vs question/statement query classifier
|
# Here we create the keyword vs question/statement query classifier
|
||||||
|
|
||||||
|
@ -368,7 +368,38 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"source": [
|
"source": [
|
||||||
"print_answers(prediction, details=\"minimal\")"
|
"# Now you can either print the object directly...\n",
|
||||||
|
"from pprint import pprint\n",
|
||||||
|
"\n",
|
||||||
|
"pprint(prediction)\n",
|
||||||
|
"\n",
|
||||||
|
"# Sample output: \n",
|
||||||
|
"# {\n",
|
||||||
|
"# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
|
||||||
|
"# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
|
||||||
|
"# ...\n",
|
||||||
|
"# ]\n",
|
||||||
|
"# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\\n===\\'\\'A Game of Thrones\\'\\'===\\nSansa Stark begins the novel by being betrothed to Crown ...'>,\n",
|
||||||
|
"# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\\n===Season 2===\\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,\n",
|
||||||
|
"# ...\n",
|
||||||
|
"# ],\n",
|
||||||
|
"# 'no_ans_gap': 11.688868522644043,\n",
|
||||||
|
"# 'node_id': 'Reader',\n",
|
||||||
|
"# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n",
|
||||||
|
"# 'query': 'Who is the father of Arya Stark?',\n",
|
||||||
|
"# 'root_node': 'Query'\n",
|
||||||
|
"# }\n"
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"source": [
|
||||||
|
"# ...or use a util to simplify the output\n",
|
||||||
|
"# Change `minimum` to `medium` or `all` to raise the level of detail\n",
|
||||||
|
"print_answers(prediction, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -134,7 +134,37 @@ def tutorial1_basic_qa_pipeline():
|
|||||||
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
|
||||||
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
|
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
|
||||||
|
|
||||||
print_answers(prediction, details="minimal")
|
# Now you can either print the object directly
|
||||||
|
print("\n\nRaw object:\n")
|
||||||
|
from pprint import pprint
|
||||||
|
pprint(prediction)
|
||||||
|
|
||||||
|
# Sample output:
|
||||||
|
# {
|
||||||
|
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
|
||||||
|
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
|
||||||
|
# ...
|
||||||
|
# ],
|
||||||
|
# 'no_ans_gap': 11.688868522644043,
|
||||||
|
# 'node_id': 'Reader',
|
||||||
|
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
|
||||||
|
# 'query': 'Who is the father of Arya Stark?',
|
||||||
|
# 'root_node': 'Query'
|
||||||
|
# }
|
||||||
|
|
||||||
|
# Note that the documents contained in the above object are the documents filtered by the Retriever from
|
||||||
|
# the document store. Although the answers were extracted from these documents, it's possible that many
|
||||||
|
# answers were taken from a single one of them, and that some of the documents were not source of any answer.
|
||||||
|
|
||||||
|
# Or use a util to simplify the output
|
||||||
|
# Change `minimum` to `medium` or `all` to raise the level of detail
|
||||||
|
print("\n\nSimplified output:\n")
|
||||||
|
print_answers(prediction, details="minimum")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -358,11 +358,42 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {}
|
"metadata": {}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"source": [
|
||||||
|
"# Now you can either print the object directly...\n",
|
||||||
|
"from pprint import pprint\n",
|
||||||
|
"\n",
|
||||||
|
"pprint(prediction)\n",
|
||||||
|
"\n",
|
||||||
|
"# Sample output: \n",
|
||||||
|
"# {\n",
|
||||||
|
"# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
|
||||||
|
"# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,\n",
|
||||||
|
"# ...\n",
|
||||||
|
"# ]\n",
|
||||||
|
"# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\\n===\\'\\'A Game of Thrones\\'\\'===\\nSansa Stark begins the novel by being betrothed to Crown ...'>,\n",
|
||||||
|
"# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\\n===Season 2===\\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,\n",
|
||||||
|
"# ...\n",
|
||||||
|
"# ],\n",
|
||||||
|
"# 'no_ans_gap': 11.688868522644043,\n",
|
||||||
|
"# 'node_id': 'Reader',\n",
|
||||||
|
"# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},\n",
|
||||||
|
"# 'query': 'Who is the father of Arya Stark?',\n",
|
||||||
|
"# 'root_node': 'Query'\n",
|
||||||
|
"# }"
|
||||||
|
],
|
||||||
|
"outputs": [],
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 11,
|
||||||
"source": [
|
"source": [
|
||||||
"print_answers(prediction, details=\"minimal\")"
|
"# ...or use a util to simplify the output\n",
|
||||||
|
"# Change `minimum` to `medium` or `all` to raise the level of detail\n",
|
||||||
|
"print_answers(prediction, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -101,7 +101,36 @@ def tutorial3_basic_qa_pipeline_without_elasticsearch():
|
|||||||
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
|
||||||
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
|
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
|
||||||
|
|
||||||
print_answers(prediction, details="minimal")
|
# Now you can either print the object directly
|
||||||
|
print("\n\nRaw object:\n")
|
||||||
|
from pprint import pprint
|
||||||
|
pprint(prediction)
|
||||||
|
|
||||||
|
# Sample output:
|
||||||
|
# {
|
||||||
|
# 'answers': [ <Answer: answer='Eddard', type='extractive', score=0.9919578731060028, offsets_in_document=[{'start': 608, 'end': 615}], offsets_in_context=[{'start': 72, 'end': 79}], document_id='cc75f739897ecbf8c14657b13dda890e', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# <Answer: answer='Ned', type='extractive', score=0.9767240881919861, offsets_in_document=[{'start': 3687, 'end': 3801}], offsets_in_context=[{'start': 18, 'end': 132}], document_id='9acf17ec9083c4022f69eb4a37187080', meta={'name': '454_Music_of_Game_of_Thrones.txt'}}, context='...' >,
|
||||||
|
# ...
|
||||||
|
# ]
|
||||||
|
# 'documents': [ <Document: content_type='text', score=0.8034909798951382, meta={'name': '332_Sansa_Stark.txt'}, embedding=None, id=d1f36ec7170e4c46cde65787fe125dfe', content='\n===\'\'A Game of Thrones\'\'===\nSansa Stark begins the novel by being betrothed to Crown ...'>,
|
||||||
|
# <Document: content_type='text', score=0.8002150354529785, meta={'name': '191_Gendry.txt'}, embedding=None, id='dd4e070a22896afa81748d6510006d2', 'content='\n===Season 2===\nGendry travels North with Yoren and other Night's Watch recruits, including Arya ...'>,
|
||||||
|
# ...
|
||||||
|
# ],
|
||||||
|
# 'no_ans_gap': 11.688868522644043,
|
||||||
|
# 'node_id': 'Reader',
|
||||||
|
# 'params': {'Reader': {'top_k': 5}, 'Retriever': {'top_k': 5}},
|
||||||
|
# 'query': 'Who is the father of Arya Stark?',
|
||||||
|
# 'root_node': 'Query'
|
||||||
|
# }
|
||||||
|
|
||||||
|
# Note that the documents contained in the above object are the documents filtered by the Retriever from
|
||||||
|
# the document store. Although the answers were extracted from these documents, it's possible that many
|
||||||
|
# answers were taken from a single one of them, and that some of the documents were not source of any answer.
|
||||||
|
|
||||||
|
# Or use a util to simplify the output
|
||||||
|
# Change `minimum` to `medium` or `all` to raise the level of detail
|
||||||
|
print("\n\nSimplified output:\n")
|
||||||
|
print_answers(prediction, details="minimum")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -265,12 +265,10 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"source": [
|
"source": [
|
||||||
|
"from haystack.utils import print_answers\n",
|
||||||
|
"\n",
|
||||||
"prediction = pipe.run(query=\"How is the virus spreading?\", params={\"Retriever\": {\"top_k\": 10}})\n",
|
"prediction = pipe.run(query=\"How is the virus spreading?\", params={\"Retriever\": {\"top_k\": 10}})\n",
|
||||||
"for a in prediction[\"answers\"]:\n",
|
"print_answers(prediction, details=\"medium\")"
|
||||||
" print(f\"Answer: {a.answer}\")\n",
|
|
||||||
" print(f\"Question: {a.meta['query']}\")\n",
|
|
||||||
" print(f\"Score: {a.score}\")\n",
|
|
||||||
" print(\"---------------------\")"
|
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from haystack.document_stores import ElasticsearchDocumentStore
|
from haystack.document_stores import ElasticsearchDocumentStore
|
||||||
|
|
||||||
from haystack.nodes import EmbeddingRetriever
|
from haystack.nodes import EmbeddingRetriever
|
||||||
from haystack.utils import launch_es
|
from haystack.utils import launch_es, print_answers
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import logging
|
import logging
|
||||||
@ -72,11 +72,7 @@ def tutorial4_faq_style_qa():
|
|||||||
pipe = FAQPipeline(retriever=retriever)
|
pipe = FAQPipeline(retriever=retriever)
|
||||||
|
|
||||||
prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
|
prediction = pipe.run(query="How is the virus spreading?", params={"Retriever": {"top_k": 10}})
|
||||||
for a in prediction["answers"]:
|
print_answers(prediction, details="medium")
|
||||||
print(f"Answer: {a.answer}")
|
|
||||||
print(f"Question: {a.meta['query']}")
|
|
||||||
print(f"Score: {a.score}")
|
|
||||||
print("---------------------")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -67,7 +67,7 @@ def tutorial6_better_retrieval_via_dpr():
|
|||||||
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", params={"Reader": {"top_k": 5}})
|
||||||
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
|
# prediction = pipe.run(query="Who is the sister of Sansa?", params={"Reader": {"top_k": 5}})
|
||||||
|
|
||||||
print_answers(prediction, details="minimal")
|
print_answers(prediction, details="minimum")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -330,11 +330,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Or alternatively use the Pipeline class\n",
|
"# Or alternatively use the Pipeline class\n",
|
||||||
"from haystack.pipelines import GenerativeQAPipeline\n",
|
"from haystack.pipelines import GenerativeQAPipeline\n",
|
||||||
|
"from haystack.utils import print_answers\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)\n",
|
"pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)\n",
|
||||||
"for question in QUESTIONS:\n",
|
"for question in QUESTIONS:\n",
|
||||||
" res = pipe.run(query=question, params={\"Generator\": {\"top_k\": 1}, \"Retriever\": {\"top_k\": 5}})\n",
|
" res = pipe.run(query=question, params={\"Generator\": {\"top_k\": 1}, \"Retriever\": {\"top_k\": 5}})\n",
|
||||||
" print(res)"
|
" print_answers(res, details=\"minimum\")"
|
||||||
],
|
],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -4,6 +4,7 @@ import pandas as pd
|
|||||||
from haystack import Document
|
from haystack import Document
|
||||||
from haystack.document_stores import FAISSDocumentStore
|
from haystack.document_stores import FAISSDocumentStore
|
||||||
from haystack.nodes import RAGenerator, DensePassageRetriever
|
from haystack.nodes import RAGenerator, DensePassageRetriever
|
||||||
|
from haystack.utils import print_answers
|
||||||
|
|
||||||
|
|
||||||
def tutorial7_rag_generator():
|
def tutorial7_rag_generator():
|
||||||
@ -35,7 +36,6 @@ def tutorial7_rag_generator():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Initialize FAISS document store to documents and corresponding index for embeddings
|
# Initialize FAISS document store to documents and corresponding index for embeddings
|
||||||
# Set `return_embedding` to `True`, so generator doesn't have to perform re-embedding
|
# Set `return_embedding` to `True`, so generator doesn't have to perform re-embedding
|
||||||
document_store = FAISSDocumentStore(
|
document_store = FAISSDocumentStore(
|
||||||
@ -108,14 +108,14 @@ def tutorial7_rag_generator():
|
|||||||
|
|
||||||
# Print you answer
|
# Print you answer
|
||||||
answers = predicted_result["answers"]
|
answers = predicted_result["answers"]
|
||||||
print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
|
print(f' -> Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
|
||||||
|
|
||||||
# Or alternatively use the Pipeline class
|
# Or alternatively use the Pipeline class
|
||||||
from haystack.pipelines import GenerativeQAPipeline
|
from haystack.pipelines import GenerativeQAPipeline
|
||||||
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
|
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
|
||||||
for question in QUESTIONS:
|
for question in QUESTIONS:
|
||||||
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
|
res = pipe.run(query=question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": 5}})
|
||||||
print(res)
|
print_answers(res, details="minimum")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tutorial7_rag_generator()
|
tutorial7_rag_generator()
|
||||||
|
@ -80,7 +80,7 @@ def tutorial8_preprocessing():
|
|||||||
split_respect_sentence_boundary=True
|
split_respect_sentence_boundary=True
|
||||||
)
|
)
|
||||||
docs_default = preprocessor.process(doc_txt)
|
docs_default = preprocessor.process(doc_txt)
|
||||||
print(f"n_docs_input: 1\nn_docs_output: {len(docs_default)}")
|
print(f"\nn_docs_input: 1\nn_docs_output: {len(docs_default)}")
|
||||||
|
|
||||||
"""
|
"""
|
||||||
## Cleaning
|
## Cleaning
|
||||||
@ -101,13 +101,14 @@ def tutorial8_preprocessing():
|
|||||||
preprocessor_nrsb = PreProcessor(split_respect_sentence_boundary=False)
|
preprocessor_nrsb = PreProcessor(split_respect_sentence_boundary=False)
|
||||||
docs_nrsb = preprocessor_nrsb.process(doc_txt)
|
docs_nrsb = preprocessor_nrsb.process(doc_txt)
|
||||||
|
|
||||||
print("RESPECTING SENTENCE BOUNDARY")
|
print("\nRESPECTING SENTENCE BOUNDARY:")
|
||||||
end_text = docs_default[0]["content"][-50:]
|
end_text = docs_default[0]["content"][-50:]
|
||||||
print("End of document: \"..." + end_text + "\"")
|
print("End of document: \"..." + end_text + "\"")
|
||||||
print()
|
|
||||||
print("NOT RESPECTING SENTENCE BOUNDARY")
|
print("\nNOT RESPECTING SENTENCE BOUNDARY:")
|
||||||
end_text_nrsb = docs_nrsb[0]["content"][-50:]
|
end_text_nrsb = docs_nrsb[0]["content"][-50:]
|
||||||
print("End of document: \"..." + end_text_nrsb + "\"")
|
print("End of document: \"..." + end_text_nrsb + "\"")
|
||||||
|
print()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
A commonly used strategy to split long documents, especially in the field of Question Answering,
|
A commonly used strategy to split long documents, especially in the field of Question Answering,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user