2022-02-21 12:22:37 +01:00
|
|
|
version: '1.1.0'
|
2021-04-07 17:53:32 +02:00
|
|
|
|
|
|
|
components: # define all the building-blocks for Pipeline
|
2021-09-10 11:53:32 +02:00
|
|
|
- name: DocumentStore
|
2021-04-07 17:53:32 +02:00
|
|
|
type: ElasticsearchDocumentStore
|
|
|
|
params:
|
|
|
|
host: localhost
|
2021-09-10 11:53:32 +02:00
|
|
|
- name: Retriever
|
2021-04-07 17:53:32 +02:00
|
|
|
type: ElasticsearchRetriever
|
|
|
|
params:
|
2021-09-10 11:53:32 +02:00
|
|
|
document_store: DocumentStore # params can reference other components defined in the YAML
|
2021-04-07 17:53:32 +02:00
|
|
|
top_k: 5
|
|
|
|
- name: Reader # custom-name for the component; helpful for visualization & debugging
|
|
|
|
type: FARMReader # Haystack Class name for the component
|
|
|
|
params:
|
|
|
|
model_name_or_path: deepset/roberta-base-squad2
|
2021-11-22 19:06:08 +01:00
|
|
|
context_window_size: 500
|
|
|
|
return_no_answer: true
|
2021-04-07 17:53:32 +02:00
|
|
|
- name: TextFileConverter
|
|
|
|
type: TextConverter
|
|
|
|
- name: PDFFileConverter
|
|
|
|
type: PDFToTextConverter
|
|
|
|
- name: Preprocessor
|
|
|
|
type: PreProcessor
|
2021-04-30 14:16:30 +05:30
|
|
|
params:
|
|
|
|
split_by: word
|
|
|
|
split_length: 1000
|
2021-04-07 17:53:32 +02:00
|
|
|
- name: FileTypeClassifier
|
|
|
|
type: FileTypeClassifier
|
|
|
|
|
|
|
|
pipelines:
|
|
|
|
- name: query # a sample extractive-qa Pipeline
|
|
|
|
type: Query
|
|
|
|
nodes:
|
2021-09-10 11:53:32 +02:00
|
|
|
- name: Retriever
|
2021-04-07 17:53:32 +02:00
|
|
|
inputs: [Query]
|
|
|
|
- name: Reader
|
2021-09-10 11:53:32 +02:00
|
|
|
inputs: [Retriever]
|
2021-04-07 17:53:32 +02:00
|
|
|
- name: indexing
|
|
|
|
type: Indexing
|
|
|
|
nodes:
|
|
|
|
- name: FileTypeClassifier
|
|
|
|
inputs: [File]
|
|
|
|
- name: TextFileConverter
|
|
|
|
inputs: [FileTypeClassifier.output_1]
|
|
|
|
- name: PDFFileConverter
|
|
|
|
inputs: [FileTypeClassifier.output_2]
|
|
|
|
- name: Preprocessor
|
|
|
|
inputs: [PDFFileConverter, TextFileConverter]
|
2021-09-10 11:53:32 +02:00
|
|
|
- name: Retriever
|
2021-04-07 17:53:32 +02:00
|
|
|
inputs: [Preprocessor]
|
2021-09-10 11:53:32 +02:00
|
|
|
- name: DocumentStore
|
|
|
|
inputs: [Retriever]
|