mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	 738e008020
			
		
	
	
		738e008020
		
			
		
	
	
	
	
		
			
			* Add run_batch methods for batch querying * Update Documentation & Code Style * Fix mypy * Update Documentation & Code Style * Fix mypy * Fix linter * Fix tests * Update Documentation & Code Style * Fix tests * Update Documentation & Code Style * Fix mypy * Fix rest api test * Update Documentation & Code Style * Add Doc strings * Update Documentation & Code Style * Add batch_size as attribute to nodes supporting batching * Adapt error messages * Adapt type of filters in retrievers * Revert change about truncation_warning in summarizer * Unify multiple_doc_lists tests * Use smaller models in extractor tests * Add return types to JoinAnswers and RouteDocuments * Adapt return statements in reader's run_batch method * Allow list of filters * Adapt error messages * Update Documentation & Code Style * Fix tests * Fix mypy * Adapt print_questions * Remove disabling warning about too many public methods * Add flag for pylint to disable warning about too many public methods in pipelines/base.py and document_stores/base.py * Add type check * Update Documentation & Code Style * Adapt tutorial 11 * Update Documentation & Code Style * Add query_batch method for DCDocStore * Update Documentation & Code Style Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
		
			
				
	
	
		
			91 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			91 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| 
 | |
| from haystack.nodes.retriever.sparse import BM25Retriever
 | |
| from haystack.nodes.reader import FARMReader
 | |
| from haystack.pipelines import Pipeline
 | |
| 
 | |
| from haystack.nodes.extractor import EntityExtractor, simplify_ner_for_qa
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 | |
| def test_extractor(document_store_with_docs):
 | |
| 
 | |
|     es_retriever = BM25Retriever(document_store=document_store_with_docs)
 | |
|     ner = EntityExtractor()
 | |
|     reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", num_processes=0)
 | |
| 
 | |
|     pipeline = Pipeline()
 | |
|     pipeline.add_node(component=es_retriever, name="ESRetriever", inputs=["Query"])
 | |
|     pipeline.add_node(component=ner, name="NER", inputs=["ESRetriever"])
 | |
|     pipeline.add_node(component=reader, name="Reader", inputs=["NER"])
 | |
| 
 | |
|     prediction = pipeline.run(
 | |
|         query="Who lives in Berlin?", params={"ESRetriever": {"top_k": 1}, "Reader": {"top_k": 1}}
 | |
|     )
 | |
|     entities = [entity["word"] for entity in prediction["answers"][0].meta["entities"]]
 | |
|     assert "Carla" in entities
 | |
|     assert "Berlin" in entities
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 | |
| def test_extractor_batch_single_query(document_store_with_docs):
 | |
| 
 | |
|     es_retriever = BM25Retriever(document_store=document_store_with_docs)
 | |
|     ner = EntityExtractor()
 | |
|     reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", num_processes=0)
 | |
| 
 | |
|     pipeline = Pipeline()
 | |
|     pipeline.add_node(component=es_retriever, name="ESRetriever", inputs=["Query"])
 | |
|     pipeline.add_node(component=ner, name="NER", inputs=["ESRetriever"])
 | |
|     pipeline.add_node(component=reader, name="Reader", inputs=["NER"])
 | |
| 
 | |
|     prediction = pipeline.run_batch(
 | |
|         queries="Who lives in Berlin?", params={"ESRetriever": {"top_k": 1}, "Reader": {"top_k": 1}}
 | |
|     )
 | |
|     entities = [entity["word"] for entity in prediction["answers"][0][0].meta["entities"]]
 | |
|     assert "Carla" in entities
 | |
|     assert "Berlin" in entities
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 | |
| def test_extractor_batch_multiple_queries(document_store_with_docs):
 | |
| 
 | |
|     es_retriever = BM25Retriever(document_store=document_store_with_docs)
 | |
|     ner = EntityExtractor()
 | |
|     reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", num_processes=0)
 | |
| 
 | |
|     pipeline = Pipeline()
 | |
|     pipeline.add_node(component=es_retriever, name="ESRetriever", inputs=["Query"])
 | |
|     pipeline.add_node(component=ner, name="NER", inputs=["ESRetriever"])
 | |
|     pipeline.add_node(component=reader, name="Reader", inputs=["NER"])
 | |
| 
 | |
|     prediction = pipeline.run_batch(
 | |
|         queries=["Who lives in Berlin?", "Who lives in New York?"],
 | |
|         params={"ESRetriever": {"top_k": 1}, "Reader": {"top_k": 1}},
 | |
|     )
 | |
|     entities_carla = [entity["word"] for entity in prediction["answers"][0][0].meta["entities"]]
 | |
|     entities_paul = [entity["word"] for entity in prediction["answers"][1][0].meta["entities"]]
 | |
|     assert "Carla" in entities_carla
 | |
|     assert "Berlin" in entities_carla
 | |
|     assert "Paul" in entities_paul
 | |
|     assert "New York" in entities_paul
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 | |
| def test_extractor_output_simplifier(document_store_with_docs):
 | |
| 
 | |
|     es_retriever = BM25Retriever(document_store=document_store_with_docs)
 | |
|     ner = EntityExtractor()
 | |
|     reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", num_processes=0)
 | |
| 
 | |
|     pipeline = Pipeline()
 | |
|     pipeline.add_node(component=es_retriever, name="ESRetriever", inputs=["Query"])
 | |
|     pipeline.add_node(component=ner, name="NER", inputs=["ESRetriever"])
 | |
|     pipeline.add_node(component=reader, name="Reader", inputs=["NER"])
 | |
| 
 | |
|     prediction = pipeline.run(
 | |
|         query="Who lives in Berlin?", params={"ESRetriever": {"top_k": 1}, "Reader": {"top_k": 1}}
 | |
|     )
 | |
|     simplified = simplify_ner_for_qa(prediction)
 | |
|     assert simplified[0] == {"answer": "Carla and I", "entities": ["Carla"]}
 |