mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-14 16:47:06 +00:00
Update tutorials (torch versions, ES version, replace Finder with Pipeline) (#814)
* remove manual torch install on colab * update elasticsearch version everywhere to 7.9.2 * fix FAQPipeline * update tutorials with new pipelines * Add latest docstring and tutorial changes * revert faqpipeline change. fix field names in tutorial 4 * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
ac9f92466f
commit
e91518ee00
@ -44,13 +44,11 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from haystack import Finder
|
|
||||||
from haystack.preprocessor.cleaning import clean_wiki_text
|
from haystack.preprocessor.cleaning import clean_wiki_text
|
||||||
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
|
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
|
||||||
from haystack.reader.farm import FARMReader
|
from haystack.reader.farm import FARMReader
|
||||||
@ -74,19 +72,19 @@ You can start Elasticsearch on your local machine instance using Docker. If Dock
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Recommended: Start Elasticsearch using Docker
|
# Recommended: Start Elasticsearch using Docker
|
||||||
#! docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2
|
#! docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# In Colab / No Docker environments: Start Elasticsearch from source
|
# In Colab / No Docker environments: Start Elasticsearch from source
|
||||||
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q
|
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
|
||||||
! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz
|
! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
|
||||||
! chown -R daemon:daemon elasticsearch-7.6.2
|
! chown -R daemon:daemon elasticsearch-7.9.2
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from subprocess import Popen, PIPE, STDOUT
|
from subprocess import Popen, PIPE, STDOUT
|
||||||
es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],
|
es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],
|
||||||
stdout=PIPE, stderr=STDOUT,
|
stdout=PIPE, stderr=STDOUT,
|
||||||
preexec_fn=lambda: os.setuid(1) # as daemon
|
preexec_fn=lambda: os.setuid(1) # as daemon
|
||||||
)
|
)
|
||||||
@ -206,13 +204,17 @@ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=Tr
|
|||||||
# reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
# reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Finder
|
### Pipeline
|
||||||
|
|
||||||
The Finder sticks together reader and retriever in a pipeline to answer our actual questions.
|
With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.
|
||||||
|
Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.
|
||||||
|
To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.
|
||||||
|
You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd).
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
finder = Finder(reader, retriever)
|
from haystack.pipeline import ExtractiveQAPipeline
|
||||||
|
pipe = ExtractiveQAPipeline(reader, retriever)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Voilà! Ask a question!
|
## Voilà! Ask a question!
|
||||||
@ -221,13 +223,13 @@ finder = Finder(reader, retriever)
|
|||||||
```python
|
```python
|
||||||
# You can configure how many candidates the reader and retriever shall return
|
# You can configure how many candidates the reader and retriever shall return
|
||||||
# The higher top_k_retriever, the better (but also the slower) your answers.
|
# The higher top_k_retriever, the better (but also the slower) your answers.
|
||||||
prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
prediction = pipe.run(query="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5)
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", top_k_reader=5)
|
||||||
# prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)
|
# prediction = pipe.run(query="Who is the sister of Sansa?", top_k_reader=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -39,8 +39,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -39,8 +39,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -153,13 +151,17 @@ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=Tr
|
|||||||
# reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
# reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Finder
|
### Pipeline
|
||||||
|
|
||||||
The Finder sticks together reader and retriever in a pipeline to answer our actual questions.
|
With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.
|
||||||
|
Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.
|
||||||
|
To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.
|
||||||
|
You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd).
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
finder = Finder(reader, retriever)
|
from haystack.pipeline import ExtractiveQAPipeline
|
||||||
|
pipe = ExtractiveQAPipeline(reader, retriever)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Voilà! Ask a question!
|
## Voilà! Ask a question!
|
||||||
@ -167,14 +169,14 @@ finder = Finder(reader, retriever)
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# You can configure how many candidates the reader and retriever shall return
|
# You can configure how many candidates the reader and retriever shall return
|
||||||
# The higher top_k_retriever, the better (but also the slower) your answers.
|
# The higher top_k_retriever, the better (but also the slower) your answers.
|
||||||
prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
prediction = pipe.run(query="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5)
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", top_k_reader=5)
|
||||||
# prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)
|
# prediction = pipe.run(query="Who is the sister of Sansa?", top_k_reader=5)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -47,8 +47,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -136,7 +134,7 @@ print(df.head())
|
|||||||
# Get embeddings for our questions from the FAQs
|
# Get embeddings for our questions from the FAQs
|
||||||
questions = list(df["question"].values)
|
questions = list(df["question"].values)
|
||||||
df["question_emb"] = retriever.embed_queries(texts=questions)
|
df["question_emb"] = retriever.embed_queries(texts=questions)
|
||||||
df = df.rename(columns={"answer": "text"})
|
df = df.rename(columns={"question": "text"})
|
||||||
|
|
||||||
# Convert Dataframe to list of dicts and index them in our DocumentStore
|
# Convert Dataframe to list of dicts and index them in our DocumentStore
|
||||||
docs_to_index = df.to_dict(orient="records")
|
docs_to_index = df.to_dict(orient="records")
|
||||||
@ -144,11 +142,18 @@ document_store.write_documents(docs_to_index)
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Ask questions
|
### Ask questions
|
||||||
Initialize a Finder (this time without a reader) and ask questions
|
Initialize a Pipeline (this time without a reader) and ask questions
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
finder = Finder(reader=None, retriever=retriever)
|
from haystack.pipeline import FAQPipeline
|
||||||
prediction = finder.get_answers_via_similar_questions(question="How is the virus spreading?", top_k_retriever=10)
|
pipe = FAQPipeline(retriever=retriever)
|
||||||
print_answers(prediction, details="all")
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
prediction = pipe.run(query="How is the virus spreading?", top_k_retriever=10)
|
||||||
|
print_answers(prediction, details="all")
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@ -38,8 +38,6 @@ You can start Elasticsearch on your local machine instance using Docker. If Dock
|
|||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -177,7 +175,6 @@ print("Reader F1-Score:", reader_eval_results["f1"])
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Evaluate combination of Reader and Retriever through Finder
|
# Evaluate combination of Reader and Retriever through Finder
|
||||||
# Evaluate combination of Reader and Retriever through Finder
|
|
||||||
finder_eval_results = finder.eval(top_k_retriever=1, top_k_reader=10, label_index=label_index, doc_index=doc_index)
|
finder_eval_results = finder.eval(top_k_retriever=1, top_k_reader=10, label_index=label_index, doc_index=doc_index)
|
||||||
finder.print_eval_results(finder_eval_results)
|
finder.print_eval_results(finder_eval_results)
|
||||||
```
|
```
|
||||||
|
|||||||
@ -80,8 +80,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -179,33 +177,31 @@ Here we use a FARMReader with the *deepset/roberta-base-squad2* model (see: http
|
|||||||
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
|
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Finder
|
### Pipeline
|
||||||
|
|
||||||
The Finder sticks together reader and retriever in a pipeline to answer our actual questions.
|
With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.
|
||||||
|
Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.
|
||||||
|
To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.
|
||||||
|
You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd).
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
finder = Finder(reader, retriever)
|
from haystack.pipeline import ExtractiveQAPipeline
|
||||||
|
pipe = ExtractiveQAPipeline(reader, retriever)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Voilà! Ask a question!
|
## Voilà! Ask a question!
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# You can configure how many candidates the reader and retriever shall return
|
# You can configure how many candidates the reader and retriever shall return
|
||||||
# The higher top_k_retriever, the better (but also the slower) your answers.
|
# The higher top_k_retriever, the better (but also the slower) your answers.
|
||||||
prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_retriever=10, top_k_reader=5)
|
prediction = pipe.run(query="Who created the Dothraki vocabulary?", top_k_retriever=10, top_k_reader=5)
|
||||||
|
|
||||||
#prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
|
||||||
#prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_retriever=10, top_k_reader=5)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
print_answers(prediction, details="minimal")
|
print_answers(prediction, details="minimal")
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
```python
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@ -37,8 +37,6 @@ Here are the packages and imports that we'll need:
|
|||||||
```python
|
```python
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install urllib3==1.25.4
|
!pip install urllib3==1.25.4
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -188,3 +186,14 @@ for question in QUESTIONS:
|
|||||||
answers = predicted_result["answers"]
|
answers = predicted_result["answers"]
|
||||||
print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
|
print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Or alternatively use the Pipeline class
|
||||||
|
from haystack.pipeline import GenerativeQAPipeline
|
||||||
|
|
||||||
|
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
|
||||||
|
for question in QUESTIONS:
|
||||||
|
res = pipe.run(query=question, top_k_generator=1, top_k_retriever=5)
|
||||||
|
print(res)
|
||||||
|
```
|
||||||
|
|||||||
@ -36,9 +36,8 @@ This tutorial will show you all the tools that Haystack provides to help you cas
|
|||||||
|
|
||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz
|
||||||
!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.02.tar.gz
|
!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin
|
||||||
!tar -xvf xpdf-tools-linux-4.02.tar.gz && sudo cp xpdf-tools-linux-4.02/bin64/pdftotext /usr/local/bin
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -21,7 +21,6 @@ This tutorial will guide you through the steps required to create a retriever th
|
|||||||
|
|
||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install git+https://github.com/deepset-ai/haystack.git
|
!pip install git+https://github.com/deepset-ai/haystack.git
|
||||||
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -434,7 +434,7 @@ class FAQPipeline(BaseStandardPipeline):
|
|||||||
|
|
||||||
results: Dict = {"query": query, "answers": []}
|
results: Dict = {"query": query, "answers": []}
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
# TODO proper calibratation of pseudo probabilities
|
# TODO proper calibration of pseudo probabilities
|
||||||
cur_answer = {
|
cur_answer = {
|
||||||
"query": doc.text,
|
"query": doc.text,
|
||||||
"answer": doc.meta["answer"],
|
"answer": doc.meta["answer"],
|
||||||
@ -448,7 +448,6 @@ class FAQPipeline(BaseStandardPipeline):
|
|||||||
}
|
}
|
||||||
|
|
||||||
results["answers"].append(cur_answer)
|
results["answers"].append(cur_answer)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -98,7 +98,7 @@ class HaystackDocumentStore:
|
|||||||
if not es.ping():
|
if not es.ping():
|
||||||
logging.info("Starting Elasticsearch ...")
|
logging.info("Starting Elasticsearch ...")
|
||||||
status = subprocess.run(
|
status = subprocess.run(
|
||||||
['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2'], shell=True
|
['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True
|
||||||
)
|
)
|
||||||
if status.returncode:
|
if status.returncode:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
|
|||||||
@ -6,7 +6,7 @@
|
|||||||
#
|
#
|
||||||
# To use GPU with Docker, ensure nvidia-docker(https://github.com/NVIDIA/nvidia-docker) is installed.
|
# To use GPU with Docker, ensure nvidia-docker(https://github.com/NVIDIA/nvidia-docker) is installed.
|
||||||
|
|
||||||
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.1
|
docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2
|
||||||
# alternative: for a demo you can also use this elasticsearch image with already indexed GoT articles
|
# alternative: for a demo you can also use this elasticsearch image with already indexed GoT articles
|
||||||
#docker run -d -p 9200:9200 -e "discovery.type=single-node" deepset/elasticsearch-game-of-thrones
|
#docker run -d -p 9200:9200 -e "discovery.type=single-node" deepset/elasticsearch-game-of-thrones
|
||||||
|
|
||||||
|
|||||||
@ -60,8 +60,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4\n"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -70,7 +69,6 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from haystack import Finder\n",
|
|
||||||
"from haystack.preprocessor.cleaning import clean_wiki_text\n",
|
"from haystack.preprocessor.cleaning import clean_wiki_text\n",
|
||||||
"from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http\n",
|
"from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http\n",
|
||||||
"from haystack.reader.farm import FARMReader\n",
|
"from haystack.reader.farm import FARMReader\n",
|
||||||
@ -111,7 +109,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Recommended: Start Elasticsearch using Docker\n",
|
"# Recommended: Start Elasticsearch using Docker\n",
|
||||||
"#! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2"
|
"#! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.9.2"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -121,13 +119,13 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# In Colab / No Docker environments: Start Elasticsearch from source\n",
|
"# In Colab / No Docker environments: Start Elasticsearch from source\n",
|
||||||
"! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n",
|
"! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n",
|
||||||
"! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n",
|
"! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n",
|
||||||
"! chown -R daemon:daemon elasticsearch-7.6.2\n",
|
"! chown -R daemon:daemon elasticsearch-7.9.2\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"from subprocess import Popen, PIPE, STDOUT\n",
|
"from subprocess import Popen, PIPE, STDOUT\n",
|
||||||
"es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n",
|
"es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],\n",
|
||||||
" stdout=PIPE, stderr=STDOUT,\n",
|
" stdout=PIPE, stderr=STDOUT,\n",
|
||||||
" preexec_fn=lambda: os.setuid(1) # as daemon\n",
|
" preexec_fn=lambda: os.setuid(1) # as daemon\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
@ -359,9 +357,12 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Finder\n",
|
"### Pipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The Finder sticks together reader and retriever in a pipeline to answer our actual questions. "
|
"With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.\n",
|
||||||
|
"Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.\n",
|
||||||
|
"To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.\n",
|
||||||
|
"You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -374,7 +375,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"finder = Finder(reader, retriever)"
|
"from haystack.pipeline import ExtractiveQAPipeline\n",
|
||||||
|
"pipe = ExtractiveQAPipeline(reader, retriever)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -406,7 +408,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# You can configure how many candidates the reader and retriever shall return\n",
|
"# You can configure how many candidates the reader and retriever shall return\n",
|
||||||
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
||||||
"prediction = finder.get_answers(question=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)"
|
"prediction = pipe.run(query=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -415,8 +417,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_reader=5)\n",
|
"# prediction = pipe.run(query=\"Who created the Dothraki vocabulary?\", top_k_reader=5)\n",
|
||||||
"# prediction = finder.get_answers(question=\"Who is the sister of Sansa?\", top_k_reader=5)"
|
"# prediction = pipe.run(query=\"Who is the sister of Sansa?\", top_k_reader=5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -48,7 +48,7 @@ def tutorial1_basic_qa_pipeline():
|
|||||||
if LAUNCH_ELASTICSEARCH:
|
if LAUNCH_ELASTICSEARCH:
|
||||||
logging.info("Starting Elasticsearch ...")
|
logging.info("Starting Elasticsearch ...")
|
||||||
status = subprocess.run(
|
status = subprocess.run(
|
||||||
['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2'], shell=True
|
['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2'], shell=True
|
||||||
)
|
)
|
||||||
if status.returncode:
|
if status.returncode:
|
||||||
raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance"
|
raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance"
|
||||||
@ -138,20 +138,20 @@ def tutorial1_basic_qa_pipeline():
|
|||||||
# reader = TransformersReader(
|
# reader = TransformersReader(
|
||||||
# model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
# model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
||||||
|
|
||||||
# ### Finder
|
# ### Pipeline
|
||||||
#
|
#
|
||||||
# The Finder sticks together reader and retriever in a pipeline to answer our actual questions.
|
# With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.
|
||||||
|
# Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.
|
||||||
|
# To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.
|
||||||
|
# You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd).
|
||||||
|
from haystack.pipeline import ExtractiveQAPipeline
|
||||||
|
pipe = ExtractiveQAPipeline(reader, retriever)
|
||||||
|
|
||||||
|
## Voilà! Ask a question!
|
||||||
|
prediction = pipe.run(query="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
||||||
|
|
||||||
finder = Finder(reader, retriever)
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", top_k_reader=5)
|
||||||
|
# prediction = pipe.run(query="Who is the sister of Sansa?", top_k_reader=5)
|
||||||
# ## Voilà! Ask a question!
|
|
||||||
# You can configure how many candidates the reader and retriever shall return
|
|
||||||
# The higher top_k_retriever, the better (but also the slower) your answers.
|
|
||||||
prediction = finder.get_answers(question="Who is the father of Sansa Stark?", top_k_retriever=10, top_k_reader=5)
|
|
||||||
|
|
||||||
|
|
||||||
# prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5)
|
|
||||||
# prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)
|
|
||||||
|
|
||||||
print_answers(prediction, details="minimal")
|
print_answers(prediction, details="minimal")
|
||||||
|
|
||||||
|
|||||||
@ -56,8 +56,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -56,8 +56,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -278,9 +277,12 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Finder\n",
|
"### Pipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The Finder sticks together reader and retriever in a pipeline to answer our actual questions. "
|
"With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.\n",
|
||||||
|
"Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.\n",
|
||||||
|
"To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.\n",
|
||||||
|
"You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -293,7 +295,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"finder = Finder(reader, retriever)"
|
"from haystack.pipeline import ExtractiveQAPipeline\n",
|
||||||
|
"pipe = ExtractiveQAPipeline(reader, retriever)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -340,8 +343,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# You can configure how many candidates the reader and retriever shall return\n",
|
"# You can configure how many candidates the reader and retriever shall return\n",
|
||||||
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
"# The higher top_k_retriever, the better (but also the slower) your answers.\n",
|
||||||
"prediction = finder.get_answers(question=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)"
|
"prediction = pipe.run(query=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -350,8 +353,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_reader=5)\n",
|
"# prediction = pipe.run(query=\"Who created the Dothraki vocabulary?\", top_k_reader=5)\n",
|
||||||
"# prediction = finder.get_answers(question=\"Who is the sister of Sansa?\", top_k_reader=5)"
|
"# prediction = pipe.run(query=\"Who is the sister of Sansa?\", top_k_reader=5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -91,18 +91,20 @@ def tutorial3_basic_qa_pipeline_without_elasticsearch():
|
|||||||
# Alternative:
|
# Alternative:
|
||||||
# reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
# reader = TransformersReader(model_name_or_path="distilbert-base-uncased-distilled-squad", tokenizer="distilbert-base-uncased", use_gpu=-1)
|
||||||
|
|
||||||
# ### Finder
|
# ### Pipeline
|
||||||
#
|
#
|
||||||
# The Finder sticks together reader and retriever in a pipeline to answer our actual questions.
|
# With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.
|
||||||
finder = Finder(reader, retriever)
|
# Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.
|
||||||
|
# To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.
|
||||||
|
# You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd).
|
||||||
|
from haystack.pipeline import ExtractiveQAPipeline
|
||||||
|
pipe = ExtractiveQAPipeline(reader, retriever)
|
||||||
|
|
||||||
# ## Voilà! Ask a question!
|
## Voilà! Ask a question!
|
||||||
|
prediction = pipe.run(query="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
||||||
|
|
||||||
# You can configure how many candidates the reader and retriever shall return
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", top_k_reader=5)
|
||||||
# The higher top_k_retriever, the better (but also the slower) your answers.
|
# prediction = pipe.run(query="Who is the sister of Sansa?", top_k_reader=5)
|
||||||
prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
|
||||||
# prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5)
|
|
||||||
# prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)
|
|
||||||
|
|
||||||
print_answers(prediction, details="minimal")
|
print_answers(prediction, details="minimal")
|
||||||
|
|
||||||
|
|||||||
@ -64,8 +64,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -221,7 +220,7 @@
|
|||||||
"# Get embeddings for our questions from the FAQs\n",
|
"# Get embeddings for our questions from the FAQs\n",
|
||||||
"questions = list(df[\"question\"].values)\n",
|
"questions = list(df[\"question\"].values)\n",
|
||||||
"df[\"question_emb\"] = retriever.embed_queries(texts=questions)\n",
|
"df[\"question_emb\"] = retriever.embed_queries(texts=questions)\n",
|
||||||
"df = df.rename(columns={\"answer\": \"text\"})\n",
|
"df = df.rename(columns={\"question\": \"text\"})\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Convert Dataframe to list of dicts and index them in our DocumentStore\n",
|
"# Convert Dataframe to list of dicts and index them in our DocumentStore\n",
|
||||||
"docs_to_index = df.to_dict(orient=\"records\")\n",
|
"docs_to_index = df.to_dict(orient=\"records\")\n",
|
||||||
@ -238,7 +237,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"source": [
|
"source": [
|
||||||
"### Ask questions\n",
|
"### Ask questions\n",
|
||||||
"Initialize a Finder (this time without a reader) and ask questions"
|
"Initialize a Pipeline (this time without a reader) and ask questions"
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false
|
"collapsed": false
|
||||||
@ -249,9 +248,8 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"finder = Finder(reader=None, retriever=retriever)\n",
|
"from haystack.pipeline import FAQPipeline\n",
|
||||||
"prediction = finder.get_answers_via_similar_questions(question=\"How is the virus spreading?\", top_k_retriever=10)\n",
|
"pipe = FAQPipeline(retriever=retriever)"
|
||||||
"print_answers(prediction, details=\"all\")"
|
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@ -259,6 +257,22 @@
|
|||||||
"name": "#%%\n"
|
"name": "#%%\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": [
|
||||||
|
"prediction = pipe.run(query=\"How is the virus spreading?\", top_k_retriever=10)\n",
|
||||||
|
"print_answers(prediction, details=\"all\")\n",
|
||||||
|
"\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@ -24,7 +24,7 @@ def tutorial4_faq_style_qa():
|
|||||||
# - Generalizability: We can only answer questions that are similar to existing ones in FAQ
|
# - Generalizability: We can only answer questions that are similar to existing ones in FAQ
|
||||||
#
|
#
|
||||||
# In some use cases, a combination of extractive QA and FAQ-style can also be an interesting option.
|
# In some use cases, a combination of extractive QA and FAQ-style can also be an interesting option.
|
||||||
LAUNCH_ELASTICSEARCH=True
|
LAUNCH_ELASTICSEARCH=False
|
||||||
|
|
||||||
if LAUNCH_ELASTICSEARCH:
|
if LAUNCH_ELASTICSEARCH:
|
||||||
logging.info("Starting Elasticsearch ...")
|
logging.info("Starting Elasticsearch ...")
|
||||||
@ -34,7 +34,7 @@ def tutorial4_faq_style_qa():
|
|||||||
if status.returncode:
|
if status.returncode:
|
||||||
raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance"
|
raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance"
|
||||||
"then set LAUNCH_ELASTICSEARCH in the script to False.")
|
"then set LAUNCH_ELASTICSEARCH in the script to False.")
|
||||||
time.sleep(15)
|
time.sleep(30)
|
||||||
|
|
||||||
### Init the DocumentStore
|
### Init the DocumentStore
|
||||||
# In contrast to Tutorial 1 (extractive QA), we:
|
# In contrast to Tutorial 1 (extractive QA), we:
|
||||||
@ -71,16 +71,18 @@ def tutorial4_faq_style_qa():
|
|||||||
# Get embeddings for our questions from the FAQs
|
# Get embeddings for our questions from the FAQs
|
||||||
questions = list(df["question"].values)
|
questions = list(df["question"].values)
|
||||||
df["question_emb"] = retriever.embed_queries(texts=questions)
|
df["question_emb"] = retriever.embed_queries(texts=questions)
|
||||||
df = df.rename(columns={"answer": "text"})
|
df = df.rename(columns={"question": "text"})
|
||||||
|
|
||||||
# Convert Dataframe to list of dicts and index them in our DocumentStore
|
# Convert Dataframe to list of dicts and index them in our DocumentStore
|
||||||
docs_to_index = df.to_dict(orient="records")
|
docs_to_index = df.to_dict(orient="records")
|
||||||
document_store.write_documents(docs_to_index)
|
document_store.write_documents(docs_to_index)
|
||||||
|
|
||||||
|
# Initialize a Pipeline (this time without a reader) and ask questions
|
||||||
|
|
||||||
# Init reader & and use Finder to get answer (same as in Tutorial 1)
|
from haystack.pipeline import FAQPipeline
|
||||||
finder = Finder(reader=None, retriever=retriever)
|
pipe = FAQPipeline(retriever=retriever)
|
||||||
prediction = finder.get_answers_via_similar_questions(question="How is the virus spreading?", top_k_retriever=10)
|
|
||||||
|
prediction = pipe.run(query="How is the virus spreading?", top_k_retriever=10)
|
||||||
print_answers(prediction, details="all")
|
print_answers(prediction, details="all")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -77,8 +77,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -469,7 +468,6 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Evaluate combination of Reader and Retriever through Finder\n",
|
|
||||||
"# Evaluate combination of Reader and Retriever through Finder\n",
|
"# Evaluate combination of Reader and Retriever through Finder\n",
|
||||||
"finder_eval_results = finder.eval(top_k_retriever=1, top_k_reader=10, label_index=label_index, doc_index=doc_index)\n",
|
"finder_eval_results = finder.eval(top_k_retriever=1, top_k_reader=10, label_index=label_index, doc_index=doc_index)\n",
|
||||||
"finder.print_eval_results(finder_eval_results)"
|
"finder.print_eval_results(finder_eval_results)"
|
||||||
|
|||||||
@ -286,8 +286,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -647,9 +646,12 @@
|
|||||||
"id": "unhLD18yA6OF"
|
"id": "unhLD18yA6OF"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Finder\n",
|
"### Pipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The Finder sticks together reader and retriever in a pipeline to answer our actual questions. "
|
"With a Haystack `Pipeline` you can stick together your building blocks to a search pipeline.\n",
|
||||||
|
"Under the hood, `Pipelines` are Directed Acyclic Graphs (DAGs) that you can easily customize for your own use cases.\n",
|
||||||
|
"To speed things up, Haystack also comes with a few predefined Pipelines. One of them is the `ExtractiveQAPipeline` that combines a retriever and a reader to answer our questions.\n",
|
||||||
|
"You can learn more about `Pipelines` in the [docs](https://haystack.deepset.ai/docs/latest/pipelinesmd)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -662,7 +664,8 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"finder = Finder(reader, retriever)"
|
"from haystack.pipeline import ExtractiveQAPipeline\n",
|
||||||
|
"pipe = ExtractiveQAPipeline(reader, retriever)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -672,7 +675,7 @@
|
|||||||
"id": "bXlBBxKXA6OL"
|
"id": "bXlBBxKXA6OL"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"### Voilà! Ask a question!"
|
"## Voilà! Ask a question!"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -712,65 +715,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# You can configure how many candidates the reader and retriever shall return\n",
|
"# You can configure how many candidates the reader and retriever shall return\n",
|
||||||
"# The higher top_k_retriever, the better (but also the slower) your answers. \n",
|
"# The higher top_k_retriever, the better (but also the slower) your answers.\n",
|
||||||
"prediction = finder.get_answers(question=\"Who created the Dothraki vocabulary?\", top_k_retriever=10, top_k_reader=5)\n",
|
"prediction = pipe.run(query=\"Who created the Dothraki vocabulary?\", top_k_retriever=10, top_k_reader=5)"
|
||||||
"\n",
|
|
||||||
"#prediction = finder.get_answers(question=\"Who is the father of Arya Stark?\", top_k_retriever=10, top_k_reader=5)\n",
|
|
||||||
"#prediction = finder.get_answers(question=\"Who is the sister of Sansa?\", top_k_retriever=10, top_k_reader=5)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"metadata": {
|
|
||||||
"colab": {
|
|
||||||
"base_uri": "https://localhost:8080/",
|
|
||||||
"height": 561
|
|
||||||
},
|
|
||||||
"colab_type": "code",
|
|
||||||
"id": "N70FgfkwA6OQ",
|
|
||||||
"outputId": "9419c75d-181c-4ef6-cea8-b328a503f19a",
|
|
||||||
"pycharm": {
|
|
||||||
"name": "#%%\n"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"[ { 'answer': 'David J. Peterson',\n",
|
|
||||||
" 'context': '\\n'\n",
|
|
||||||
" '===Valyrian===\\n'\n",
|
|
||||||
" 'David J. Peterson, who created the Dothraki language for '\n",
|
|
||||||
" 'the first season of the show, was entrusted by the '\n",
|
|
||||||
" 'producers to design a new '},\n",
|
|
||||||
" { 'answer': 'David Peterson',\n",
|
|
||||||
" 'context': '\\n'\n",
|
|
||||||
" '==Phonology and romanization==\\n'\n",
|
|
||||||
" 'David Peterson has said, \"You know, most people probably '\n",
|
|
||||||
" \"don't really know what Arabic actually sounds like, so to \"\n",
|
|
||||||
" 'an '},\n",
|
|
||||||
" { 'answer': 'books',\n",
|
|
||||||
" 'context': 'ints. First, the language had to match the uses already '\n",
|
|
||||||
" 'put down in the books. Secondly, it had to be easily '\n",
|
|
||||||
" 'pronounceable or learnable by the actors'},\n",
|
|
||||||
" { 'answer': \"'''Nevakhi vekha ha maan: Rekke, m'aresakea norethi fitte.'\",\n",
|
|
||||||
" 'context': '\\n'\n",
|
|
||||||
" '==Sample==\\n'\n",
|
|
||||||
" \": '''Nevakhi vekha ha maan: Rekke, m'aresakea norethi \"\n",
|
|
||||||
" \"fitte.'''\\n\"\n",
|
|
||||||
" ': seat. exist. for there. with.coward. hair. short\\n'\n",
|
|
||||||
" \": ''There is a place f\"},\n",
|
|
||||||
" { 'answer': 'Tyrion',\n",
|
|
||||||
" 'context': 'ding, as well as his nephew Joffrey, the new king, as '\n",
|
|
||||||
" 'civil war begins. Tyrion struggles to strengthen and '\n",
|
|
||||||
" 'protect the city and family who hate him an'}]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"print_answers(prediction, details=\"minimal\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -778,7 +724,10 @@
|
|||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"print_answers(prediction, details=\"minimal\")\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@ -52,18 +52,15 @@ def tutorial6_better_retrieval_via_dpr():
|
|||||||
# Hugging Face's model hub (https://huggingface.co/models)
|
# Hugging Face's model hub (https://huggingface.co/models)
|
||||||
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
|
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)
|
||||||
|
|
||||||
### Finder
|
### Pipeline
|
||||||
# The Finder sticks together reader and retriever in a pipeline to answer our actual questions.
|
from haystack.pipeline import ExtractiveQAPipeline
|
||||||
finder = Finder(reader, retriever)
|
pipe = ExtractiveQAPipeline(reader, retriever)
|
||||||
|
|
||||||
### Voilà! Ask a question!
|
## Voilà! Ask a question!
|
||||||
# You can configure how many candidates the reader and retriever shall return
|
prediction = pipe.run(query="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
||||||
# The higher top_k_retriever, the better (but also the slower) your answers.
|
|
||||||
prediction = finder.get_answers(question="Who is the father of Arya Stark?", top_k_retriever=10, top_k_reader=5)
|
|
||||||
|
|
||||||
|
# prediction = pipe.run(query="Who created the Dothraki vocabulary?", top_k_reader=5)
|
||||||
# prediction = finder.get_answers(question="Who created the Dothraki vocabulary?", top_k_reader=5)
|
# prediction = pipe.run(query="Who is the sister of Sansa?", top_k_reader=5)
|
||||||
# prediction = finder.get_answers(question="Who is the sister of Sansa?", top_k_reader=5)
|
|
||||||
|
|
||||||
print_answers(prediction, details="minimal")
|
print_answers(prediction, details="minimal")
|
||||||
|
|
||||||
|
|||||||
@ -62,8 +62,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install urllib3==1.25.4\n",
|
"!pip install urllib3==1.25.4"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n"
|
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@ -322,6 +321,26 @@
|
|||||||
"name": "#%%\n"
|
"name": "#%%\n"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Or alternatively use the Pipeline class\n",
|
||||||
|
"from haystack.pipeline import GenerativeQAPipeline\n",
|
||||||
|
"\n",
|
||||||
|
"pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)\n",
|
||||||
|
"for question in QUESTIONS:\n",
|
||||||
|
" res = pipe.run(query=question, top_k_generator=1, top_k_retriever=5)\n",
|
||||||
|
" print(res)"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"pycharm": {
|
||||||
|
"name": "#%%\n"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@ -111,6 +111,12 @@ def tutorial7_rag_generator():
|
|||||||
answers = predicted_result["answers"]
|
answers = predicted_result["answers"]
|
||||||
print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
|
print(f'Generated answer is \'{answers[0]["answer"]}\' for the question = \'{question}\'')
|
||||||
|
|
||||||
|
# Or alternatively use the Pipeline class
|
||||||
|
from haystack.pipeline import GenerativeQAPipeline
|
||||||
|
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
|
||||||
|
for question in QUESTIONS:
|
||||||
|
res = pipe.run(query=question, top_k_generator=1, top_k_retriever=5)
|
||||||
|
print(res)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tutorial7_rag_generator()
|
tutorial7_rag_generator()
|
||||||
|
|||||||
@ -59,9 +59,8 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n",
|
"!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.03.tar.gz\n",
|
||||||
"!wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.02.tar.gz\n",
|
"!tar -xvf xpdf-tools-linux-4.03.tar.gz && sudo cp xpdf-tools-linux-4.03/bin64/pdftotext /usr/local/bin"
|
||||||
"!tar -xvf xpdf-tools-linux-4.02.tar.gz && sudo cp xpdf-tools-linux-4.02/bin64/pdftotext /usr/local/bin"
|
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@ -497,15 +496,6 @@
|
|||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython2",
|
"pygments_lexer": "ipython2",
|
||||||
"version": "2.7.6"
|
"version": "2.7.6"
|
||||||
},
|
|
||||||
"pycharm": {
|
|
||||||
"stem_cell": {
|
|
||||||
"cell_type": "raw",
|
|
||||||
"source": [],
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@ -26,8 +26,7 @@
|
|||||||
"#! pip install farm-haystack\n",
|
"#! pip install farm-haystack\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Install the latest master of Haystack\n",
|
"# Install the latest master of Haystack\n",
|
||||||
"!pip install git+https://github.com/deepset-ai/haystack.git\n",
|
"!pip install git+https://github.com/deepset-ai/haystack.git"
|
||||||
"!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html"
|
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
@ -403,15 +402,6 @@
|
|||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython2",
|
"pygments_lexer": "ipython2",
|
||||||
"version": "2.7.6"
|
"version": "2.7.6"
|
||||||
},
|
|
||||||
"pycharm": {
|
|
||||||
"stem_cell": {
|
|
||||||
"cell_type": "raw",
|
|
||||||
"source": [],
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user