updated tutorials (#1359)

This commit is contained in:
Markus Paff 2021-08-19 21:16:56 +02:00 committed by GitHub
parent a3c746abf5
commit ff2049cd45
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 10 additions and 14 deletions

View File

@ -133,7 +133,7 @@ dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, spl
# 'meta': {'name': "<DOCUMENT_NAME_HERE>", ...} # 'meta': {'name': "<DOCUMENT_NAME_HERE>", ...}
#} #}
# (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and # (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and
# can be accessed later for filtering or shown in the responses of the Finder) # can be accessed later for filtering or shown in the responses of the Pipeline)
# Let's have a look at the first 3 entries: # Let's have a look at the first 3 entries:
print(dicts[:3]) print(dicts[:3])
@ -142,7 +142,7 @@ print(dicts[:3])
document_store.write_documents(dicts) document_store.write_documents(dicts)
``` ```
## Initalize Retriever, Reader, & Finder ## Initalize Retriever, Reader, & Pipeline
### Retriever ### Retriever

View File

@ -34,7 +34,7 @@ These lines are to install Haystack through pip
```python ```python
# Install the latest release of Haystack in your own environment # Install the latest release of Haystack in your own environment
#! pip install farm-haystack !pip install farm-haystack
# Install the latest master of Haystack # Install the latest master of Haystack
!pip install grpcio-tools==1.34.1 !pip install grpcio-tools==1.34.1

View File

@ -72,9 +72,9 @@ Then change the `use_gpu` arguments below to `True`
```python ```python
reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True) reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True)
train_data = "data/squad20" data_dir = "data/squad20"
# train_data = "PATH/TO_YOUR/TRAIN_DATA" # data_dir = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model") reader.train(data_dir=data_dir, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model")
``` ```

View File

@ -44,7 +44,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
```python ```python
from haystack import Finder
from haystack.preprocessor.cleaning import clean_wiki_text from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
from haystack.reader.farm import FARMReader from haystack.reader.farm import FARMReader
@ -102,7 +101,7 @@ print(dicts[:3])
document_store.write_documents(dicts) document_store.write_documents(dicts)
``` ```
## Initalize Retriever, Reader, & Finder ## Initalize Retriever, Reader & Pipeline
### Retriever ### Retriever

View File

@ -52,7 +52,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
```python ```python
from haystack import Finder
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.retriever.dense import EmbeddingRetriever from haystack.retriever.dense import EmbeddingRetriever

View File

@ -148,7 +148,7 @@ retriever = ElasticsearchRetriever(document_store=document_store)
# Initialize Reader # Initialize Reader
from haystack.reader.farm import FARMReader from haystack.reader.farm import FARMReader
reader = FARMReader("deepset/roberta-base-squad2", top_k_per_candidate=4, return_no_answer=True) reader = FARMReader("deepset/roberta-base-squad2", top_k=4, return_no_answer=True)
``` ```

View File

@ -85,7 +85,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
```python ```python
from haystack import Finder
from haystack.preprocessor.cleaning import clean_wiki_text from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
from haystack.reader.farm import FARMReader from haystack.reader.farm import FARMReader
@ -146,7 +145,7 @@ dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, spl
document_store.write_documents(dicts) document_store.write_documents(dicts)
``` ```
### Initalize Retriever, Reader, & Finder ### Initalize Retriever, Reader & Pipeline
#### Retriever #### Retriever

View File

@ -190,8 +190,7 @@ preprocessor = PreProcessor(
split_length=100, split_length=100,
split_respect_sentence_boundary=True split_respect_sentence_boundary=True
) )
nested_docs = [preprocessor.process(d) for d in all_docs] docs = preprocessor.process(all_docs)
docs = [d for x in nested_docs for d in x]
print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}") print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
``` ```