mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-27 18:06:17 +00:00
updated tutorials (#1359)
This commit is contained in:
parent
a3c746abf5
commit
ff2049cd45
@ -133,7 +133,7 @@ dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, spl
|
|||||||
# 'meta': {'name': "<DOCUMENT_NAME_HERE>", ...}
|
# 'meta': {'name': "<DOCUMENT_NAME_HERE>", ...}
|
||||||
#}
|
#}
|
||||||
# (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and
|
# (Optionally: you can also add more key-value-pairs here, that will be indexed as fields in Elasticsearch and
|
||||||
# can be accessed later for filtering or shown in the responses of the Finder)
|
# can be accessed later for filtering or shown in the responses of the Pipeline)
|
||||||
|
|
||||||
# Let's have a look at the first 3 entries:
|
# Let's have a look at the first 3 entries:
|
||||||
print(dicts[:3])
|
print(dicts[:3])
|
||||||
@ -142,7 +142,7 @@ print(dicts[:3])
|
|||||||
document_store.write_documents(dicts)
|
document_store.write_documents(dicts)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Initalize Retriever, Reader, & Finder
|
## Initalize Retriever, Reader, & Pipeline
|
||||||
|
|
||||||
### Retriever
|
### Retriever
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ These lines are to install Haystack through pip
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Install the latest release of Haystack in your own environment
|
# Install the latest release of Haystack in your own environment
|
||||||
#! pip install farm-haystack
|
!pip install farm-haystack
|
||||||
|
|
||||||
# Install the latest master of Haystack
|
# Install the latest master of Haystack
|
||||||
!pip install grpcio-tools==1.34.1
|
!pip install grpcio-tools==1.34.1
|
||||||
|
@ -72,9 +72,9 @@ Then change the `use_gpu` arguments below to `True`
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True)
|
reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True)
|
||||||
train_data = "data/squad20"
|
data_dir = "data/squad20"
|
||||||
# train_data = "PATH/TO_YOUR/TRAIN_DATA"
|
# data_dir = "PATH/TO_YOUR/TRAIN_DATA"
|
||||||
reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model")
|
reader.train(data_dir=data_dir, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +44,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from haystack import Finder
|
|
||||||
from haystack.preprocessor.cleaning import clean_wiki_text
|
from haystack.preprocessor.cleaning import clean_wiki_text
|
||||||
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
|
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
|
||||||
from haystack.reader.farm import FARMReader
|
from haystack.reader.farm import FARMReader
|
||||||
@ -102,7 +101,7 @@ print(dicts[:3])
|
|||||||
document_store.write_documents(dicts)
|
document_store.write_documents(dicts)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Initalize Retriever, Reader, & Finder
|
## Initalize Retriever, Reader & Pipeline
|
||||||
|
|
||||||
### Retriever
|
### Retriever
|
||||||
|
|
||||||
|
@ -52,7 +52,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from haystack import Finder
|
|
||||||
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
|
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
|
||||||
|
|
||||||
from haystack.retriever.dense import EmbeddingRetriever
|
from haystack.retriever.dense import EmbeddingRetriever
|
||||||
|
@ -148,7 +148,7 @@ retriever = ElasticsearchRetriever(document_store=document_store)
|
|||||||
# Initialize Reader
|
# Initialize Reader
|
||||||
from haystack.reader.farm import FARMReader
|
from haystack.reader.farm import FARMReader
|
||||||
|
|
||||||
reader = FARMReader("deepset/roberta-base-squad2", top_k_per_candidate=4, return_no_answer=True)
|
reader = FARMReader("deepset/roberta-base-squad2", top_k=4, return_no_answer=True)
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -85,7 +85,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from haystack import Finder
|
|
||||||
from haystack.preprocessor.cleaning import clean_wiki_text
|
from haystack.preprocessor.cleaning import clean_wiki_text
|
||||||
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
|
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http
|
||||||
from haystack.reader.farm import FARMReader
|
from haystack.reader.farm import FARMReader
|
||||||
@ -146,7 +145,7 @@ dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, spl
|
|||||||
document_store.write_documents(dicts)
|
document_store.write_documents(dicts)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Initalize Retriever, Reader, & Finder
|
### Initalize Retriever, Reader & Pipeline
|
||||||
|
|
||||||
#### Retriever
|
#### Retriever
|
||||||
|
|
||||||
|
@ -190,8 +190,7 @@ preprocessor = PreProcessor(
|
|||||||
split_length=100,
|
split_length=100,
|
||||||
split_respect_sentence_boundary=True
|
split_respect_sentence_boundary=True
|
||||||
)
|
)
|
||||||
nested_docs = [preprocessor.process(d) for d in all_docs]
|
docs = preprocessor.process(all_docs)
|
||||||
docs = [d for x in nested_docs for d in x]
|
|
||||||
|
|
||||||
print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
|
print(f"n_files_input: {len(all_docs)}\nn_docs_output: {len(docs)}")
|
||||||
```
|
```
|
||||||
|
Loading…
x
Reference in New Issue
Block a user