mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-26 07:19:13 +00:00 
			
		
		
		
	refactor: Move InMemoryDocumentStore tests to their own class (#3614)
				
					
				
			* move tests to their own class * move more tests * add specific job * fix test * Update test/document_stores/test_memory.py Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
This commit is contained in:
		
							parent
							
								
									0e05f71f33
								
							
						
					
					
						commit
						a15af7f8c3
					
				
							
								
								
									
										28
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										28
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							| @ -428,6 +428,34 @@ jobs: | |||||||
|         channel: '#haystack' |         channel: '#haystack' | ||||||
|       if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main' |       if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main' | ||||||
| 
 | 
 | ||||||
|  |   integration-tests-memory: | ||||||
|  |     name: Integration / memory / ${{ matrix.os }} | ||||||
|  |     needs: | ||||||
|  |      - unit-tests | ||||||
|  |     strategy: | ||||||
|  |       fail-fast: false | ||||||
|  |       matrix: | ||||||
|  |         os: [ubuntu-latest,macos-latest,windows-latest] | ||||||
|  |     runs-on: ${{ matrix.os }} | ||||||
|  |     steps: | ||||||
|  |       - uses: actions/checkout@v3 | ||||||
|  | 
 | ||||||
|  |       - name: Setup Python | ||||||
|  |         uses: ./.github/actions/python_cache/ | ||||||
|  | 
 | ||||||
|  |       - name: Install Haystack | ||||||
|  |         run: pip install -U . | ||||||
|  | 
 | ||||||
|  |       - name: Run tests | ||||||
|  |         run: | | ||||||
|  |           pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_memory.py | ||||||
|  | 
 | ||||||
|  |       - uses: act10ns/slack@v1 | ||||||
|  |         with: | ||||||
|  |           status: ${{ job.status }} | ||||||
|  |           channel: '#haystack' | ||||||
|  |         if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main' | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # | # | ||||||
| # TODO: the following steps need to be revisited | # TODO: the following steps need to be revisited | ||||||
|  | |||||||
| @ -1436,41 +1436,3 @@ def test_cosine_sanity_check(document_store_small): | |||||||
| 
 | 
 | ||||||
|     # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318 |     # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318 | ||||||
|     assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002) |     assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002) | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def test_normalize_embeddings_diff_shapes(): |  | ||||||
|     VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32") |  | ||||||
|     BaseDocumentStore.normalize_embedding(VEC_1) |  | ||||||
|     assert np.linalg.norm(VEC_1) - 1 < 0.01 |  | ||||||
| 
 |  | ||||||
|     VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1) |  | ||||||
|     BaseDocumentStore.normalize_embedding(VEC_1) |  | ||||||
|     assert np.linalg.norm(VEC_1) - 1 < 0.01 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def test_memory_update_bm25(): |  | ||||||
|     ds = InMemoryDocumentStore(use_bm25=False) |  | ||||||
|     ds.write_documents(DOCUMENTS) |  | ||||||
|     ds.update_bm25() |  | ||||||
|     bm25_representation = ds.bm25[ds.index] |  | ||||||
|     assert isinstance(bm25_representation, BM25) |  | ||||||
|     assert bm25_representation.corpus_size == ds.get_document_count() |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) |  | ||||||
| def test_memory_query(document_store_with_docs): |  | ||||||
|     query_text = "Rome" |  | ||||||
|     docs = document_store_with_docs.query(query=query_text, top_k=1) |  | ||||||
|     assert len(docs) == 1 |  | ||||||
|     assert docs[0].content == "My name is Matteo and I live in Rome" |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) |  | ||||||
| def test_memory_query_batch(document_store_with_docs): |  | ||||||
|     query_texts = ["Paris", "Madrid"] |  | ||||||
|     docs = document_store_with_docs.query_batch(queries=query_texts, top_k=5) |  | ||||||
|     assert len(docs) == 2 |  | ||||||
|     assert len(docs[0]) == 5 |  | ||||||
|     assert docs[0][0].content == "My name is Christelle and I live in Paris" |  | ||||||
|     assert len(docs[1]) == 5 |  | ||||||
|     assert docs[1][0].content == "My name is Camila and I live in Madrid" |  | ||||||
|  | |||||||
							
								
								
									
										93
									
								
								test/document_stores/test_memory.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								test/document_stores/test_memory.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,93 @@ | |||||||
|  | import logging | ||||||
|  | 
 | ||||||
|  | import pytest | ||||||
|  | from rank_bm25 import BM25 | ||||||
|  | 
 | ||||||
|  | from haystack.document_stores.memory import InMemoryDocumentStore | ||||||
|  | from haystack.schema import Document | ||||||
|  | 
 | ||||||
|  | from .test_base import DocumentStoreBaseTestAbstract | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestInMemoryDocumentStore(DocumentStoreBaseTestAbstract): | ||||||
|  |     @pytest.fixture | ||||||
|  |     def ds(self): | ||||||
|  |         return InMemoryDocumentStore(return_embedding=True, use_bm25=True) | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_delete_index(self, ds, documents): | ||||||
|  |         """Contrary to other Document Stores, this doesn't raise if the index is empty""" | ||||||
|  |         ds.write_documents(documents, index="custom_index") | ||||||
|  |         assert ds.get_document_count(index="custom_index") == len(documents) | ||||||
|  |         ds.delete_index(index="custom_index") | ||||||
|  |         assert ds.get_document_count(index="custom_index") == 0 | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_ne_filters(self, ds, documents): | ||||||
|  |         """ | ||||||
|  |         InMemory doesn't include documents if the field is missing, | ||||||
|  |         so we customize this test | ||||||
|  |         """ | ||||||
|  |         ds.write_documents(documents) | ||||||
|  | 
 | ||||||
|  |         result = ds.get_all_documents(filters={"year": {"$ne": "2020"}}) | ||||||
|  |         assert len(result) == 3 | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.skip | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_nin_filters(self, ds, documents): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.skip | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_comparison_filters(self, ds, documents): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.skip | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_nested_condition_filters(self, ds, documents): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.skip | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_nested_condition_not_filters(self, ds, documents): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_get_documents_by_id(self, ds, documents): | ||||||
|  |         """ | ||||||
|  |         The base test uses the batch_size param that's not supported | ||||||
|  |         here, so we override the test case | ||||||
|  |         """ | ||||||
|  |         ds.write_documents(documents) | ||||||
|  |         ids = [doc.id for doc in documents] | ||||||
|  |         result = {doc.id for doc in ds.get_documents_by_id(ids)} | ||||||
|  |         assert set(ids) == result | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_update_bm25(self, documents): | ||||||
|  |         ds = InMemoryDocumentStore(use_bm25=False) | ||||||
|  |         ds.write_documents(documents) | ||||||
|  |         ds.update_bm25() | ||||||
|  |         bm25_representation = ds.bm25[ds.index] | ||||||
|  |         assert isinstance(bm25_representation, BM25) | ||||||
|  |         assert bm25_representation.corpus_size == ds.get_document_count() | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_memory_query(self, ds, documents): | ||||||
|  |         ds.write_documents(documents) | ||||||
|  |         query_text = "Bar" | ||||||
|  |         docs = ds.query(query=query_text, top_k=1) | ||||||
|  |         assert len(docs) == 1 | ||||||
|  |         assert "A Bar Document" in docs[0].content | ||||||
|  | 
 | ||||||
|  |     @pytest.mark.integration | ||||||
|  |     def test_memory_query_batch(self, ds, documents): | ||||||
|  |         ds.write_documents(documents) | ||||||
|  |         query_texts = ["Foo", "Bar"] | ||||||
|  |         docs = ds.query_batch(queries=query_texts, top_k=5) | ||||||
|  |         assert len(docs) == 2 | ||||||
|  |         assert len(docs[0]) == 5 | ||||||
|  |         assert "A Foo Document" in docs[0][0].content | ||||||
|  |         assert len(docs[1]) == 5 | ||||||
|  |         assert "A Bar Document" in docs[1][0].content | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Massimiliano Pippi
						Massimiliano Pippi