haystack/test/samples/pipeline/test_pipeline.yaml

version: '0.7'

components:
  - name: Reader
    type: FARMReader
    params:
      no_ans_boost: -10
      model_name_or_path: deepset/roberta-base-squad2
  - name: ESRetriever
    type: ElasticsearchRetriever
    params:
      document_store: DocumentStore
      custom_query: null
  - name: DocumentStore
    type: ElasticsearchDocumentStore
    params:
      index: haystack_test
      label_index: haystack_test_label
  - name: PDFConverter
    type: PDFToTextConverter
    params:
      remove_numeric_tables: false
  - name: Preprocessor
    type: PreProcessor
    params:
      clean_whitespace: true
  - name: IndexTimeDocumentClassifier
    type: TransformersDocumentClassifier
    params:
      batch_size: 16
      use_gpu: -1
  - name: QueryTimeDocumentClassifier
    type: TransformersDocumentClassifier
    params:
      use_gpu: -1


pipelines:
  - name: query_pipeline
    type: Pipeline
    nodes:
      - name: ESRetriever
        inputs: [Query]
      - name: Reader
        inputs: [ESRetriever]

  - name: ray_query_pipeline
    type: RayPipeline
    nodes:
      - name: ESRetriever
        replicas: 2
        inputs: [ Query ]
      - name: Reader
        inputs: [ ESRetriever ]

  - name: query_pipeline_with_document_classifier
    type: Pipeline
    nodes:
      - name: ESRetriever
        inputs: [Query]
      - name: QueryTimeDocumentClassifier
        inputs: [ESRetriever]
      - name: Reader
        inputs: [QueryTimeDocumentClassifier]

  - name: indexing_pipeline
    type: Pipeline
    nodes:
      - name: PDFConverter
        inputs: [File]
      - name: Preprocessor
        inputs: [PDFConverter]
      - name: ESRetriever
        inputs: [Preprocessor]
      - name: DocumentStore
        inputs: [ESRetriever]

  - name: indexing_text_pipeline
    type: Pipeline
    nodes:
      - name: TextConverter
        inputs: [File]
      - name: Preprocessor
        inputs: [TextConverter]
      - name: ESRetriever
        inputs: [Preprocessor]
      - name: DocumentStore
        inputs: [ESRetriever]

  - name: indexing_pipeline_with_classifier
    type: Pipeline
    nodes:
      - name: PDFConverter
        inputs: [File]
      - name: Preprocessor
        inputs: [PDFConverter]
      - name: IndexTimeDocumentClassifier
        inputs: [Preprocessor]
      - name: ESRetriever
        inputs: [IndexTimeDocumentClassifier]
      - name: DocumentStore
        inputs: [ESRetriever]