mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-30 11:26:17 +00:00

* basic example of document classifier in preprocessing logic * add batch_size to TransformersDocumentClassifier * complete tutorial16 * Add latest docstring and tutorial changes * fix missing batch_size * add notebook * test for batch_size use added * add tutorial 16 to headers.py * Add latest docstring and tutorial changes * make DocumentClassifier indexing pipeline rdy * Add latest docstring and tutorial changes * flexibility improvements for DocumentClassifier in Pipelines * Add latest docstring and tutorial changes * fix index time usage * remove query from documentclassifier tests * improve classification_field resolving + minor fixes * Add latest docstring and tutorial changes * tutorial 16 extended with zero shot and pipelines * Add latest docstring and tutorial changes * install graphviz in notebook * Add latest docstring and tutorial changes * remove convert_to_dicts * Add latest docstring and tutorial changes * Fix typo * Add latest docstring and tutorial changes * remove retriever from indexing pipeline * Add latest docstring and tutorial changes * fix save_to_yaml when using FileTypeClassifier * emphasize the impact with zero shot classification * Add latest docstring and tutorial changes * adjust use_gpu to boolean in test Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
103 lines
2.4 KiB
YAML
103 lines
2.4 KiB
YAML
version: '0.7'
|
|
|
|
components:
|
|
- name: Reader
|
|
type: FARMReader
|
|
params:
|
|
no_ans_boost: -10
|
|
model_name_or_path: deepset/roberta-base-squad2
|
|
- name: ESRetriever
|
|
type: ElasticsearchRetriever
|
|
params:
|
|
document_store: DocumentStore
|
|
custom_query: null
|
|
- name: DocumentStore
|
|
type: ElasticsearchDocumentStore
|
|
params:
|
|
index: haystack_test
|
|
label_index: haystack_test_label
|
|
- name: PDFConverter
|
|
type: PDFToTextConverter
|
|
params:
|
|
remove_numeric_tables: false
|
|
- name: Preprocessor
|
|
type: PreProcessor
|
|
params:
|
|
clean_whitespace: true
|
|
- name: IndexTimeDocumentClassifier
|
|
type: TransformersDocumentClassifier
|
|
params:
|
|
batch_size: 16
|
|
use_gpu: -1
|
|
- name: QueryTimeDocumentClassifier
|
|
type: TransformersDocumentClassifier
|
|
params:
|
|
use_gpu: -1
|
|
|
|
|
|
pipelines:
|
|
- name: query_pipeline
|
|
type: Pipeline
|
|
nodes:
|
|
- name: ESRetriever
|
|
inputs: [Query]
|
|
- name: Reader
|
|
inputs: [ESRetriever]
|
|
|
|
- name: ray_query_pipeline
|
|
type: RayPipeline
|
|
nodes:
|
|
- name: ESRetriever
|
|
replicas: 2
|
|
inputs: [ Query ]
|
|
- name: Reader
|
|
inputs: [ ESRetriever ]
|
|
|
|
- name: query_pipeline_with_document_classifier
|
|
type: Pipeline
|
|
nodes:
|
|
- name: ESRetriever
|
|
inputs: [Query]
|
|
- name: QueryTimeDocumentClassifier
|
|
inputs: [ESRetriever]
|
|
- name: Reader
|
|
inputs: [QueryTimeDocumentClassifier]
|
|
|
|
- name: indexing_pipeline
|
|
type: Pipeline
|
|
nodes:
|
|
- name: PDFConverter
|
|
inputs: [File]
|
|
- name: Preprocessor
|
|
inputs: [PDFConverter]
|
|
- name: ESRetriever
|
|
inputs: [Preprocessor]
|
|
- name: DocumentStore
|
|
inputs: [ESRetriever]
|
|
|
|
- name: indexing_text_pipeline
|
|
type: Pipeline
|
|
nodes:
|
|
- name: TextConverter
|
|
inputs: [File]
|
|
- name: Preprocessor
|
|
inputs: [TextConverter]
|
|
- name: ESRetriever
|
|
inputs: [Preprocessor]
|
|
- name: DocumentStore
|
|
inputs: [ESRetriever]
|
|
|
|
- name: indexing_pipeline_with_classifier
|
|
type: Pipeline
|
|
nodes:
|
|
- name: PDFConverter
|
|
inputs: [File]
|
|
- name: Preprocessor
|
|
inputs: [PDFConverter]
|
|
- name: IndexTimeDocumentClassifier
|
|
inputs: [Preprocessor]
|
|
- name: ESRetriever
|
|
inputs: [IndexTimeDocumentClassifier]
|
|
- name: DocumentStore
|
|
inputs: [ESRetriever]
|