diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f465c2feb..79712e486 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -428,6 +428,34 @@ jobs: channel: '#haystack' if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main' + integration-tests-memory: + name: Integration / memory / ${{ matrix.os }} + needs: + - unit-tests + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest,macos-latest,windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + + - name: Setup Python + uses: ./.github/actions/python_cache/ + + - name: Install Haystack + run: pip install -U . + + - name: Run tests + run: | + pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_memory.py + + - uses: act10ns/slack@v1 + with: + status: ${{ job.status }} + channel: '#haystack' + if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main' + # # TODO: the following steps need to be revisited diff --git a/test/document_stores/test_document_store.py b/test/document_stores/test_document_store.py index a4acbbb31..73cd22c02 100644 --- a/test/document_stores/test_document_store.py +++ b/test/document_stores/test_document_store.py @@ -1436,41 +1436,3 @@ def test_cosine_sanity_check(document_store_small): # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318 assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.0002) - - -def test_normalize_embeddings_diff_shapes(): - VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32") - BaseDocumentStore.normalize_embedding(VEC_1) - assert np.linalg.norm(VEC_1) - 1 < 0.01 - - VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1) - BaseDocumentStore.normalize_embedding(VEC_1) - assert np.linalg.norm(VEC_1) - 1 < 0.01 - - -def test_memory_update_bm25(): - ds = InMemoryDocumentStore(use_bm25=False) - ds.write_documents(DOCUMENTS) - ds.update_bm25() - bm25_representation = ds.bm25[ds.index] - assert isinstance(bm25_representation, BM25) - assert bm25_representation.corpus_size == ds.get_document_count() - - -@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) -def test_memory_query(document_store_with_docs): - query_text = "Rome" - docs = document_store_with_docs.query(query=query_text, top_k=1) - assert len(docs) == 1 - assert docs[0].content == "My name is Matteo and I live in Rome" - - -@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True) -def test_memory_query_batch(document_store_with_docs): - query_texts = ["Paris", "Madrid"] - docs = document_store_with_docs.query_batch(queries=query_texts, top_k=5) - assert len(docs) == 2 - assert len(docs[0]) == 5 - assert docs[0][0].content == "My name is Christelle and I live in Paris" - assert len(docs[1]) == 5 - assert docs[1][0].content == "My name is Camila and I live in Madrid" diff --git a/test/document_stores/test_memory.py b/test/document_stores/test_memory.py new file mode 100644 index 000000000..e5a1d048d --- /dev/null +++ b/test/document_stores/test_memory.py @@ -0,0 +1,93 @@ +import logging + +import pytest +from rank_bm25 import BM25 + +from haystack.document_stores.memory import InMemoryDocumentStore +from haystack.schema import Document + +from .test_base import DocumentStoreBaseTestAbstract + + +class TestInMemoryDocumentStore(DocumentStoreBaseTestAbstract): + @pytest.fixture + def ds(self): + return InMemoryDocumentStore(return_embedding=True, use_bm25=True) + + @pytest.mark.integration + def test_delete_index(self, ds, documents): + """Contrary to other Document Stores, this doesn't raise if the index is empty""" + ds.write_documents(documents, index="custom_index") + assert ds.get_document_count(index="custom_index") == len(documents) + ds.delete_index(index="custom_index") + assert ds.get_document_count(index="custom_index") == 0 + + @pytest.mark.integration + def test_ne_filters(self, ds, documents): + """ + InMemory doesn't include documents if the field is missing, + so we customize this test + """ + ds.write_documents(documents) + + result = ds.get_all_documents(filters={"year": {"$ne": "2020"}}) + assert len(result) == 3 + + @pytest.mark.skip + @pytest.mark.integration + def test_nin_filters(self, ds, documents): + pass + + @pytest.mark.skip + @pytest.mark.integration + def test_comparison_filters(self, ds, documents): + pass + + @pytest.mark.skip + @pytest.mark.integration + def test_nested_condition_filters(self, ds, documents): + pass + + @pytest.mark.skip + @pytest.mark.integration + def test_nested_condition_not_filters(self, ds, documents): + pass + + @pytest.mark.integration + def test_get_documents_by_id(self, ds, documents): + """ + The base test uses the batch_size param that's not supported + here, so we override the test case + """ + ds.write_documents(documents) + ids = [doc.id for doc in documents] + result = {doc.id for doc in ds.get_documents_by_id(ids)} + assert set(ids) == result + + @pytest.mark.integration + def test_update_bm25(self, documents): + ds = InMemoryDocumentStore(use_bm25=False) + ds.write_documents(documents) + ds.update_bm25() + bm25_representation = ds.bm25[ds.index] + assert isinstance(bm25_representation, BM25) + assert bm25_representation.corpus_size == ds.get_document_count() + + @pytest.mark.integration + def test_memory_query(self, ds, documents): + ds.write_documents(documents) + query_text = "Bar" + docs = ds.query(query=query_text, top_k=1) + assert len(docs) == 1 + assert "A Bar Document" in docs[0].content + + @pytest.mark.integration + def test_memory_query_batch(self, ds, documents): + ds.write_documents(documents) + query_texts = ["Foo", "Bar"] + docs = ds.query_batch(queries=query_texts, top_k=5) + assert len(docs) == 2 + assert len(docs[0]) == 5 + assert "A Foo Document" in docs[0][0].content + assert len(docs[1]) == 5 + assert "A Bar Document" in docs[1][0].content