mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	 c802305ccf
			
		
	
	
		c802305ccf
		
			
		
	
	
	
	
		
			
			* move out standard pipelines e2e * fixing unit tests * add test data * feedback * pylint * black
		
			
				
	
	
		
			115 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			115 lines
		
	
	
		
			5.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from haystack.document_stores import InMemoryDocumentStore
 | |
| from haystack.pipelines import FAQPipeline, DocumentSearchPipeline, MostSimilarDocumentsPipeline
 | |
| from haystack.nodes import EmbeddingRetriever
 | |
| from haystack.schema import Document
 | |
| 
 | |
| 
 | |
| def test_faq_pipeline_batch():
 | |
|     documents = [
 | |
|         {"content": "How to test module-1?", "meta": {"source": "wiki1", "answer": "Using tests for module-1"}},
 | |
|         {"content": "How to test module-2?", "meta": {"source": "wiki2", "answer": "Using tests for module-2"}},
 | |
|         {"content": "How to test module-3?", "meta": {"source": "wiki3", "answer": "Using tests for module-3"}},
 | |
|         {"content": "How to test module-4?", "meta": {"source": "wiki4", "answer": "Using tests for module-4"}},
 | |
|         {"content": "How to test module-5?", "meta": {"source": "wiki5", "answer": "Using tests for module-5"}},
 | |
|     ]
 | |
|     document_store = InMemoryDocumentStore()
 | |
|     retriever = EmbeddingRetriever(
 | |
|         document_store=document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
 | |
|     )
 | |
|     document_store.write_documents(documents)
 | |
|     document_store.update_embeddings(retriever)
 | |
| 
 | |
|     pipeline = FAQPipeline(retriever=retriever)
 | |
| 
 | |
|     output = pipeline.run_batch(queries=["How to test this?", "How to test this?"], params={"Retriever": {"top_k": 3}})
 | |
|     assert len(output["answers"]) == 2  # 2 queries
 | |
|     assert len(output["answers"][0]) == 3  # 3 answers per query
 | |
|     assert output["queries"][0].startswith("How to")
 | |
|     assert output["answers"][0][0].answer.startswith("Using tests")
 | |
| 
 | |
| 
 | |
| def test_document_search_pipeline_batch():
 | |
|     documents = [
 | |
|         {"content": "Sample text for document-1", "meta": {"source": "wiki1"}},
 | |
|         {"content": "Sample text for document-2", "meta": {"source": "wiki2"}},
 | |
|         {"content": "Sample text for document-3", "meta": {"source": "wiki3"}},
 | |
|         {"content": "Sample text for document-4", "meta": {"source": "wiki4"}},
 | |
|         {"content": "Sample text for document-5", "meta": {"source": "wiki5"}},
 | |
|     ]
 | |
|     document_store = InMemoryDocumentStore()
 | |
|     retriever = EmbeddingRetriever(
 | |
|         document_store=document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
 | |
|     )
 | |
|     document_store.write_documents(documents)
 | |
|     document_store.update_embeddings(retriever)
 | |
| 
 | |
|     pipeline = DocumentSearchPipeline(retriever=retriever)
 | |
|     output = pipeline.run_batch(queries=["How to test this?", "How to test this?"], params={"top_k": 4})
 | |
|     assert len(output["documents"]) == 2  # 2 queries
 | |
|     assert len(output["documents"][0]) == 4  # 4 docs per query
 | |
| 
 | |
| 
 | |
| def test_most_similar_documents_pipeline_batch():
 | |
|     documents = [
 | |
|         {"id": "a", "content": "Sample text for document-1", "meta": {"source": "wiki1"}},
 | |
|         {"id": "b", "content": "Sample text for document-2", "meta": {"source": "wiki2"}},
 | |
|         {"content": "Sample text for document-3", "meta": {"source": "wiki3"}},
 | |
|         {"content": "Sample text for document-4", "meta": {"source": "wiki4"}},
 | |
|         {"content": "Sample text for document-5", "meta": {"source": "wiki5"}},
 | |
|     ]
 | |
|     document_store = InMemoryDocumentStore()
 | |
|     retriever = EmbeddingRetriever(
 | |
|         document_store=document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
 | |
|     )
 | |
|     document_store.write_documents(documents)
 | |
|     document_store.update_embeddings(retriever)
 | |
| 
 | |
|     docs_id: list = ["a", "b"]
 | |
|     pipeline = MostSimilarDocumentsPipeline(document_store=document_store)
 | |
|     list_of_documents = pipeline.run_batch(document_ids=docs_id)
 | |
| 
 | |
|     assert len(list_of_documents[0]) > 1
 | |
|     assert isinstance(list_of_documents, list)
 | |
|     assert len(list_of_documents) == len(docs_id)
 | |
| 
 | |
|     for another_list in list_of_documents:
 | |
|         assert isinstance(another_list, list)
 | |
|         for document in another_list:
 | |
|             assert isinstance(document, Document)
 | |
|             assert isinstance(document.id, str)
 | |
|             assert isinstance(document.content, str)
 | |
| 
 | |
| 
 | |
| def test_most_similar_documents_pipeline_with_filters_batch():
 | |
|     documents = [
 | |
|         {"id": "a", "content": "Sample text for document-1", "meta": {"source": "wiki1"}},
 | |
|         {"id": "b", "content": "Sample text for document-2", "meta": {"source": "wiki2"}},
 | |
|         {"content": "Sample text for document-3", "meta": {"source": "wiki3"}},
 | |
|         {"content": "Sample text for document-4", "meta": {"source": "wiki4"}},
 | |
|         {"content": "Sample text for document-5", "meta": {"source": "wiki5"}},
 | |
|     ]
 | |
|     document_store = InMemoryDocumentStore()
 | |
|     retriever = EmbeddingRetriever(
 | |
|         document_store=document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
 | |
|     )
 | |
|     document_store = InMemoryDocumentStore()
 | |
|     document_store.write_documents(documents)
 | |
|     document_store.update_embeddings(retriever)
 | |
| 
 | |
|     docs_id: list = ["a", "b"]
 | |
|     filters = {"source": ["wiki3", "wiki4", "wiki5"]}
 | |
|     pipeline = MostSimilarDocumentsPipeline(document_store=document_store)
 | |
|     list_of_documents = pipeline.run_batch(document_ids=docs_id, filters=filters)
 | |
| 
 | |
|     assert len(list_of_documents[0]) > 1
 | |
|     assert isinstance(list_of_documents, list)
 | |
|     assert len(list_of_documents) == len(docs_id)
 | |
| 
 | |
|     for another_list in list_of_documents:
 | |
|         assert isinstance(another_list, list)
 | |
|         for document in another_list:
 | |
|             assert isinstance(document, Document)
 | |
|             assert isinstance(document.id, str)
 | |
|             assert isinstance(document.content, str)
 | |
|             assert document.meta["source"] in ["wiki3", "wiki4", "wiki5"]
 |