haystack/test/samples/pipeline/ray.advanced.haystack-pipeline.yml
Sebastian 59857cb492
feat: Speed up reader tests (#3476)
* Use a smaller reader where possible

* Change scope to module of reader to get faster load times
2022-10-26 19:04:18 +02:00

74 lines
1.9 KiB
YAML

version: ignore
extras: ray
components:
- name: DocumentStore
type: ElasticsearchDocumentStore
params:
index: haystack_test
label_index: haystack_test_label
- name: ESRetriever1
type: BM25Retriever
params:
document_store: DocumentStore
- name: ESRetriever2
# type: TfidfRetriever # can't use TfidfRetriever until https://github.com/deepset-ai/haystack/pull/2984 isn't merged
type: BM25Retriever
params:
document_store: DocumentStore
- name: Reader
type: FARMReader
params:
no_ans_boost: -10
model_name_or_path: deepset/bert-medium-squad2-distilled
num_processes: 0
- name: PDFConverter
type: PDFToTextConverter
params:
remove_numeric_tables: false
- name: Preprocessor
type: PreProcessor
params:
clean_whitespace: true
- name: IndexTimeDocumentClassifier
type: TransformersDocumentClassifier
params:
batch_size: 16
use_gpu: false
- name: QueryTimeDocumentClassifier
type: TransformersDocumentClassifier
params:
use_gpu: false
- name: JoinDocuments
params: {}
type: JoinDocuments
pipelines:
- name: ray_query_pipeline
nodes:
- name: ESRetriever1
inputs: [ Query ]
serve_deployment_kwargs:
num_replicas: 2
version: Twenty
ray_actor_options:
# num_gpus: 0.25 # we have no GPU to test this
num_cpus: 0.25
max_concurrent_queries: 17
- name: ESRetriever2
inputs: [ Query ]
serve_deployment_kwargs:
num_replicas: 2
version: Twenty
ray_actor_options:
# num_gpus: 0.25 # we have no GPU to test this
num_cpus: 0.25
max_concurrent_queries: 15
- name: JoinDocuments
inputs:
- ESRetriever1
- ESRetriever2
- name: Reader
inputs: [ JoinDocuments ]