| 
									
										
										
										
											2022-03-21 14:47:04 +01:00
										 |  |  | # To allow your IDE to autocomplete and validate your YAML pipelines, name them as <name of your choice>.haystack-pipeline.yml
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-19 16:08:08 +02:00
										 |  |  | version: ignore
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | components:    # define all the building-blocks for Pipeline
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |   - name: DocumentStore
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |     type: ElasticsearchDocumentStore
 | 
					
						
							|  |  |  |     params:
 | 
					
						
							|  |  |  |       host: localhost
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |   - name: Retriever
 | 
					
						
							| 
									
										
										
										
											2022-04-26 16:09:39 +02:00
										 |  |  |     type: BM25Retriever
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |     params:
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |       document_store: DocumentStore    # params can reference other components defined in the YAML
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |       top_k: 5
 | 
					
						
							|  |  |  |   - name: Reader       # custom-name for the component; helpful for visualization & debugging
 | 
					
						
							|  |  |  |     type: FARMReader    # Haystack Class name for the component
 | 
					
						
							|  |  |  |     params:
 | 
					
						
							|  |  |  |       model_name_or_path: deepset/roberta-base-squad2
 | 
					
						
							| 
									
										
										
										
											2021-11-22 19:06:08 +01:00
										 |  |  |       context_window_size: 500
 | 
					
						
							|  |  |  |       return_no_answer: true
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |   - name: TextFileConverter
 | 
					
						
							|  |  |  |     type: TextConverter
 | 
					
						
							|  |  |  |   - name: PDFFileConverter
 | 
					
						
							|  |  |  |     type: PDFToTextConverter
 | 
					
						
							|  |  |  |   - name: Preprocessor
 | 
					
						
							|  |  |  |     type: PreProcessor
 | 
					
						
							| 
									
										
										
										
											2021-04-30 14:16:30 +05:30
										 |  |  |     params:
 | 
					
						
							|  |  |  |       split_by: word
 | 
					
						
							|  |  |  |       split_length: 1000
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |   - name: FileTypeClassifier
 | 
					
						
							|  |  |  |     type: FileTypeClassifier
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | pipelines:
 | 
					
						
							|  |  |  |   - name: query    # a sample extractive-qa Pipeline
 | 
					
						
							|  |  |  |     nodes:
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |       - name: Retriever
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |         inputs: [Query]
 | 
					
						
							|  |  |  |       - name: Reader
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |         inputs: [Retriever]
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |   - name: indexing
 | 
					
						
							|  |  |  |     nodes:
 | 
					
						
							|  |  |  |       - name: FileTypeClassifier
 | 
					
						
							|  |  |  |         inputs: [File]
 | 
					
						
							|  |  |  |       - name: TextFileConverter
 | 
					
						
							|  |  |  |         inputs: [FileTypeClassifier.output_1]
 | 
					
						
							|  |  |  |       - name: PDFFileConverter
 | 
					
						
							|  |  |  |         inputs: [FileTypeClassifier.output_2]
 | 
					
						
							|  |  |  |       - name: Preprocessor
 | 
					
						
							|  |  |  |         inputs: [PDFFileConverter, TextFileConverter]
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |       - name: Retriever
 | 
					
						
							| 
									
										
										
										
											2021-04-07 17:53:32 +02:00
										 |  |  |         inputs: [Preprocessor]
 | 
					
						
							| 
									
										
										
										
											2021-09-10 11:53:32 +02:00
										 |  |  |       - name: DocumentStore
 | 
					
						
							|  |  |  |         inputs: [Retriever]
 |