Fix milvus and faiss tests not running (#3263)

* fix milvus and faiss tests not running * fix schema manually * fix test_dpr_embedding test for milvus * pip freeze on milvus tests * fix milvus1 tests being executed: fix all_doc_stores order * Revert "pip freeze on milvus tests" This reverts commit 75ebb6f7e507bb8477e87d9e63b4a294f7946cab. * make infer_required_doc_store more robust * don't skip tests without docstore requirements * use markers for docstore tests
2026-01-09 05:37:25 +00:00 · 2022-09-22 17:46:49 +02:00 · 2022-09-22 17:46:49 +02:00 · 4fa9d2d8e7
commit 4fa9d2d8e7
parent 2b803a265b
5 changed files with 61 additions and 29 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -313,7 +313,7 @@ jobs:
      env:
        TOKENIZERS_PARALLELISM: 'false'
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=faiss
+        pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" test/document_stores/ --document_store_type=faiss

    - uses: act10ns/slack@v1
      with:
@ -348,7 +348,7 @@ jobs:
      env:
        TOKENIZERS_PARALLELISM: 'false'
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss
+        pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss

    - uses: act10ns/slack@v1
      with:
@ -386,7 +386,7 @@ jobs:
      env:
        TOKENIZERS_PARALLELISM: 'false'
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=milvus
+        pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" test/document_stores/ --document_store_type=milvus

    - name: Dump docker logs on failure
      if: failure()
@ -434,7 +434,7 @@ jobs:
  #     env:
  #       TOKENIZERS_PARALLELISM: 'false'
  #     run: |
-  #       pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus
+  #       pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus


  weaviate-tests-linux:
@ -464,7 +464,7 @@ jobs:
      env:
        TOKENIZERS_PARALLELISM: 'false'
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=weaviate
+        pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" test/document_stores/ --document_store_type=weaviate

    - name: Dump docker logs on failure
      if: failure()
@ -508,7 +508,7 @@ jobs:
  #     env:
  #       TOKENIZERS_PARALLELISM: 'false'
  #     run: |
-  #       pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate
+  #       pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate


  pinecone-tests-linux:
@ -536,7 +536,7 @@ jobs:
        PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
        TOKENIZERS_PARALLELISM: 'false'
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
+        pytest ${{ env.PYTEST_PARAMS }} -m "pinecone and not integration" test/document_stores/ --document_store_type=pinecone

    - uses: act10ns/slack@v1
      with:
@ -573,7 +573,7 @@ jobs:
        TOKENIZERS_PARALLELISM: 'false'
        PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
+        pytest ${{ env.PYTEST_PARAMS }} -m "pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone

    - uses: act10ns/slack@v1
      with:
--- a/conftest.py
+++ b/conftest.py
@ -2,7 +2,7 @@ def pytest_addoption(parser):
    parser.addoption(
        "--document_store_type",
        action="store",
-        default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
+        default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone, opensearch",
    )


--- a/test/conftest.py
+++ b/test/conftest.py
@ -11,6 +11,7 @@ import uuid
 import logging
 from pathlib import Path
 import os
+import re

 import requests_cache
 import responses
@ -169,32 +170,63 @@ def pytest_collection_modifyitems(config, items):
                keywords.extend(i.split("-"))
            else:
                keywords.append(i)
-        for cur_doc_store in [
-            "elasticsearch",
-            "faiss",
-            "sql",
-            "memory",
-            "milvus1",
-            "milvus",
-            "weaviate",
-            "pinecone",
-            "opensearch",
-        ]:
-            if keywords and cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run:
-                skip_docstore = pytest.mark.skip(
-                    reason=f'{cur_doc_store} is disabled. Enable via pytest --document_store_type="{cur_doc_store}"'
-                )
-                item.add_marker(skip_docstore)

-        if "milvus1" in keywords and not milvus1:
+        required_doc_store = infer_required_doc_store(item, keywords)
+
+        if required_doc_store and required_doc_store not in document_store_types_to_run:
+            skip_docstore = pytest.mark.skip(
+                reason=f'{required_doc_store} is disabled. Enable via pytest --document_store_type="{required_doc_store}"'
+            )
+            item.add_marker(skip_docstore)
+
+        if "milvus1" == required_doc_store and not milvus1:
            skip_milvus1 = pytest.mark.skip(reason="Skipping Tests for 'milvus1', as Milvus2 seems to be installed.")
            item.add_marker(skip_milvus1)

-        elif "milvus" in keywords and milvus1:
+        elif "milvus" == required_doc_store and milvus1:
            skip_milvus = pytest.mark.skip(reason="Skipping Tests for 'milvus', as Milvus1 seems to be installed.")
            item.add_marker(skip_milvus)


+def infer_required_doc_store(item, keywords):
+    # assumption: a test runs only with one document_store
+    # if there are multiple docstore markers, we apply the following heuristics:
+    # 1. if the test was parameterized, we use the the parameter
+    # 2. if the test name contains the docstore name, we use that
+    # 3. use an arbitrary one by calling set.pop()
+    required_doc_store = None
+    all_doc_stores = {
+        "elasticsearch",
+        "faiss",
+        "sql",
+        "memory",
+        "milvus1",
+        "milvus",
+        "weaviate",
+        "pinecone",
+        "opensearch",
+    }
+    docstore_markers = set(keywords).intersection(all_doc_stores)
+    if len(docstore_markers) > 1:
+        # if parameterized infer the docstore from the parameter
+        if hasattr(item, "callspec"):
+            for doc_store in all_doc_stores:
+                # callspec.id contains the parameter values of the test
+                if re.search(f"(^|-){doc_store}($|[-_])", item.callspec.id):
+                    required_doc_store = doc_store
+                    break
+        # if still not found, infer the docstore from the test name
+        if required_doc_store is None:
+            for doc_store in all_doc_stores:
+                if doc_store in item.name:
+                    required_doc_store = doc_store
+                    break
+    # if still not found or there is only one, use an arbitrary one from the markers
+    if required_doc_store is None:
+        required_doc_store = docstore_markers.pop() if docstore_markers else None
+    return required_doc_store
+
+
 #
 # Empty mocks, as a base for unit tests.
 #
--- a/test/document_stores/test_faiss_and_milvus.py
+++ b/test/document_stores/test_faiss_and_milvus.py
@ -518,7 +518,7 @@ def test_cosine_similarity(document_store):
    # now check if vectors are normalized when updating embeddings
    class MockRetriever:
        def embed_documents(self, docs):
-            return [np.random.rand(768).astype(np.float32) for doc in docs]
+            return np.random.rand(len(docs), 768).astype(np.float32)

    retriever = MockRetriever()
    document_store.update_embeddings(retriever=retriever)
--- a/test/nodes/test_retriever.py
+++ b/test/nodes/test_retriever.py
@ -193,7 +193,7 @@ def test_dpr_embedding(document_store: BaseDocumentStore, retriever, docs_with_i
        # always normalize vector as faiss returns normalized vectors and other document stores do not
        embedding /= np.linalg.norm(embedding)
        assert len(embedding) == 768
-        assert isclose(embedding[0], expected_value, rel_tol=0.001)
+        assert isclose(embedding[0], expected_value, rel_tol=0.01)


@pytest.mark.integration