refactor: Move InMemoryDocumentStore tests to their own class (#3614)

* move tests to their own class * move more tests * add specific job * fix test * Update test/document_stores/test_memory.py Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
2025-10-30 01:09:43 +00:00 · 2022-11-23 15:33:46 +01:00 · 2022-11-23 15:33:46 +01:00 · a15af7f8c3
commit a15af7f8c3
parent 0e05f71f33
3 changed files with 121 additions and 38 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -428,6 +428,34 @@ jobs:
        channel: '#haystack'
      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

+  integration-tests-memory:
+    name: Integration / memory / ${{ matrix.os }}
+    needs:
+     - unit-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest,macos-latest,windows-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: ./.github/actions/python_cache/
+
+      - name: Install Haystack
+        run: pip install -U .
+
+      - name: Run tests
+        run: |
+          pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_memory.py
+
+      - uses: act10ns/slack@v1
+        with:
+          status: ${{ job.status }}
+          channel: '#haystack'
+        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+

 #
 # TODO: the following steps need to be revisited
--- a/test/document_stores/test_document_store.py
+++ b/test/document_stores/test_document_store.py
@ -1436,41 +1436,3 @@ def test_cosine_sanity_check(document_store_small):

    # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
    assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002)
-
-
-def test_normalize_embeddings_diff_shapes():
-    VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32")
-    BaseDocumentStore.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-    VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1)
-    BaseDocumentStore.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-
-def test_memory_update_bm25():
-    ds = InMemoryDocumentStore(use_bm25=False)
-    ds.write_documents(DOCUMENTS)
-    ds.update_bm25()
-    bm25_representation = ds.bm25[ds.index]
-    assert isinstance(bm25_representation, BM25)
-    assert bm25_representation.corpus_size == ds.get_document_count()
-
-
-@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
-def test_memory_query(document_store_with_docs):
-    query_text = "Rome"
-    docs = document_store_with_docs.query(query=query_text, top_k=1)
-    assert len(docs) == 1
-    assert docs[0].content == "My name is Matteo and I live in Rome"
-
-
-@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
-def test_memory_query_batch(document_store_with_docs):
-    query_texts = ["Paris", "Madrid"]
-    docs = document_store_with_docs.query_batch(queries=query_texts, top_k=5)
-    assert len(docs) == 2
-    assert len(docs[0]) == 5
-    assert docs[0][0].content == "My name is Christelle and I live in Paris"
-    assert len(docs[1]) == 5
-    assert docs[1][0].content == "My name is Camila and I live in Madrid"
--- a/test/document_stores/test_memory.py
+++ b/test/document_stores/test_memory.py
@ -0,0 +1,93 @@
+import logging
+
+import pytest
+from rank_bm25 import BM25
+
+from haystack.document_stores.memory import InMemoryDocumentStore
+from haystack.schema import Document
+
+from .test_base import DocumentStoreBaseTestAbstract
+
+
+class TestInMemoryDocumentStore(DocumentStoreBaseTestAbstract):
+    @pytest.fixture
+    def ds(self):
+        return InMemoryDocumentStore(return_embedding=True, use_bm25=True)
+
+    @pytest.mark.integration
+    def test_delete_index(self, ds, documents):
+        """Contrary to other Document Stores, this doesn't raise if the index is empty"""
+        ds.write_documents(documents, index="custom_index")
+        assert ds.get_document_count(index="custom_index") == len(documents)
+        ds.delete_index(index="custom_index")
+        assert ds.get_document_count(index="custom_index") == 0
+
+    @pytest.mark.integration
+    def test_ne_filters(self, ds, documents):
+        """
+        InMemory doesn't include documents if the field is missing,
+        so we customize this test
+        """
+        ds.write_documents(documents)
+
+        result = ds.get_all_documents(filters={"year": {"$ne": "2020"}})
+        assert len(result) == 3
+
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_nin_filters(self, ds, documents):
+        pass
+
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_comparison_filters(self, ds, documents):
+        pass
+
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_nested_condition_filters(self, ds, documents):
+        pass
+
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_nested_condition_not_filters(self, ds, documents):
+        pass
+
+    @pytest.mark.integration
+    def test_get_documents_by_id(self, ds, documents):
+        """
+        The base test uses the batch_size param that's not supported
+        here, so we override the test case
+        """
+        ds.write_documents(documents)
+        ids = [doc.id for doc in documents]
+        result = {doc.id for doc in ds.get_documents_by_id(ids)}
+        assert set(ids) == result
+
+    @pytest.mark.integration
+    def test_update_bm25(self, documents):
+        ds = InMemoryDocumentStore(use_bm25=False)
+        ds.write_documents(documents)
+        ds.update_bm25()
+        bm25_representation = ds.bm25[ds.index]
+        assert isinstance(bm25_representation, BM25)
+        assert bm25_representation.corpus_size == ds.get_document_count()
+
+    @pytest.mark.integration
+    def test_memory_query(self, ds, documents):
+        ds.write_documents(documents)
+        query_text = "Bar"
+        docs = ds.query(query=query_text, top_k=1)
+        assert len(docs) == 1
+        assert "A Bar Document" in docs[0].content
+
+    @pytest.mark.integration
+    def test_memory_query_batch(self, ds, documents):
+        ds.write_documents(documents)
+        query_texts = ["Foo", "Bar"]
+        docs = ds.query_batch(queries=query_texts, top_k=5)
+        assert len(docs) == 2
+        assert len(docs[0]) == 5
+        assert "A Foo Document" in docs[0][0].content
+        assert len(docs[1]) == 5
+        assert "A Bar Document" in docs[1][0].content