updated tutorials (#1359)

2025-08-27 18:06:17 +00:00 · 2021-08-19 21:16:56 +02:00 · 2021-08-19 21:16:56 +02:00 · ff2049cd45
commit ff2049cd45
parent a3c746abf5
8 changed files with 10 additions and 14 deletions
--- a/docs/_src/tutorials/tutorials/1.md
+++ b/docs/_src/tutorials/tutorials/1.md
@ -133,7 +133,7 @@ dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, spl
 #    'meta': {'name': "<DOCUMENT_NAME_HERE>", ...}
 #}
 # (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and
-# can be accessed later for filtering or shown in the responses of the Finder)
+# can be accessed later for filtering or shown in the responses of the Pipeline)
 # Let's have a look at the first 3 entries:
 print(dicts[:3])
@ -142,7 +142,7 @@ print(dicts[:3])
 document_store.write_documents(dicts)
 ```
-## Initalize Retriever, Reader,  & Finder
+## Initalize Retriever, Reader,  & Pipeline
 ### Retriever
--- a/docs/_src/tutorials/tutorials/11.md
+++ b/docs/_src/tutorials/tutorials/11.md
@ -34,7 +34,7 @@ These lines are to install Haystack through pip
 ```python
 # Install the latest release of Haystack in your own environment
-#! pip install farm-haystack
+!pip install farm-haystack
 # Install the latest master of Haystack
 !pip install grpcio-tools==1.34.1
--- a/docs/_src/tutorials/tutorials/2.md
+++ b/docs/_src/tutorials/tutorials/2.md
@ -72,9 +72,9 @@ Then change the `use_gpu` arguments below to `True`
 ```python
 reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True)
-train_data = "data/squad20"
+data_dir = "data/squad20"
-# train_data = "PATH/TO_YOUR/TRAIN_DATA" 
+# data_dir = "PATH/TO_YOUR/TRAIN_DATA" 
-reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model")
+reader.train(data_dir=data_dir, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model")
 ```
--- a/docs/_src/tutorials/tutorials/3.md
+++ b/docs/_src/tutorials/tutorials/3.md
@ -44,7 +44,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
 ```python
 from haystack import Finder
 from haystack.preprocessor.cleaning import clean_wiki_text
 from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
 from haystack.reader.farm import FARMReader
@ -102,7 +101,7 @@ print(dicts[:3])
 document_store.write_documents(dicts)
 ```
-## Initalize Retriever, Reader,  & Finder
+## Initalize Retriever, Reader & Pipeline
 ### Retriever
--- a/docs/_src/tutorials/tutorials/4.md
+++ b/docs/_src/tutorials/tutorials/4.md
@ -52,7 +52,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
 ```python
 from haystack import Finder
 from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
 from haystack.retriever.dense import EmbeddingRetriever
--- a/docs/_src/tutorials/tutorials/5.md
+++ b/docs/_src/tutorials/tutorials/5.md
@ -148,7 +148,7 @@ retriever = ElasticsearchRetriever(document_store=document_store)
 # Initialize Reader
 from haystack.reader.farm import FARMReader
-reader = FARMReader("deepset/roberta-base-squad2", top_k_per_candidate=4, return_no_answer=True)
+reader = FARMReader("deepset/roberta-base-squad2", top_k=4, return_no_answer=True)
 ```
--- a/docs/_src/tutorials/tutorials/6.md
+++ b/docs/_src/tutorials/tutorials/6.md
@ -85,7 +85,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
 ```python
 from haystack import Finder
 from haystack.preprocessor.cleaning import clean_wiki_text
 from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
 from haystack.reader.farm import FARMReader
@ -146,7 +145,7 @@ dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, spl
 document_store.write_documents(dicts)
 ```
-### Initalize Retriever, Reader,  & Finder
+### Initalize Retriever, Reader & Pipeline
 #### Retriever
--- a/docs/_src/tutorials/tutorials/8.md
+++ b/docs/_src/tutorials/tutorials/8.md
@ -190,8 +190,7 @@ preprocessor = PreProcessor(
    split_length=100,
    split_respect_sentence_boundary=True
 )
-nested_docs = [preprocessor.process(d) for d in all_docs]
+docs = preprocessor.process(all_docs)
 docs = [d for x in nested_docs for d in x]
 print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
 ```