diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1cf535f02..0fdc4f7df 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -313,7 +313,7 @@ jobs: env: TOKENIZERS_PARALLELISM: 'false' run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=faiss + pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" test/document_stores/ --document_store_type=faiss - uses: act10ns/slack@v1 with: @@ -348,7 +348,7 @@ jobs: env: TOKENIZERS_PARALLELISM: 'false' run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss + pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss - uses: act10ns/slack@v1 with: @@ -386,7 +386,7 @@ jobs: env: TOKENIZERS_PARALLELISM: 'false' run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=milvus + pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" test/document_stores/ --document_store_type=milvus - name: Dump docker logs on failure if: failure() @@ -434,7 +434,7 @@ jobs: # env: # TOKENIZERS_PARALLELISM: 'false' # run: | - # pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus + # pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus weaviate-tests-linux: @@ -464,7 +464,7 @@ jobs: env: TOKENIZERS_PARALLELISM: 'false' run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=weaviate + pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" test/document_stores/ --document_store_type=weaviate - name: Dump docker logs on failure if: failure() @@ -508,7 +508,7 @@ jobs: # env: # TOKENIZERS_PARALLELISM: 'false' # run: | - # pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate + # pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate pinecone-tests-linux: @@ -536,7 +536,7 @@ jobs: PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} TOKENIZERS_PARALLELISM: 'false' run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone + pytest ${{ env.PYTEST_PARAMS }} -m "pinecone and not integration" test/document_stores/ --document_store_type=pinecone - uses: act10ns/slack@v1 with: @@ -573,7 +573,7 @@ jobs: TOKENIZERS_PARALLELISM: 'false' PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} run: | - pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone + pytest ${{ env.PYTEST_PARAMS }} -m "pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone - uses: act10ns/slack@v1 with: diff --git a/conftest.py b/conftest.py index a381d802f..b0ea11b4d 100644 --- a/conftest.py +++ b/conftest.py @@ -2,7 +2,7 @@ def pytest_addoption(parser): parser.addoption( "--document_store_type", action="store", - default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone", + default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone, opensearch", ) diff --git a/test/conftest.py b/test/conftest.py index e28adc39b..b01f6e3e6 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -11,6 +11,7 @@ import uuid import logging from pathlib import Path import os +import re import requests_cache import responses @@ -169,32 +170,63 @@ def pytest_collection_modifyitems(config, items): keywords.extend(i.split("-")) else: keywords.append(i) - for cur_doc_store in [ - "elasticsearch", - "faiss", - "sql", - "memory", - "milvus1", - "milvus", - "weaviate", - "pinecone", - "opensearch", - ]: - if keywords and cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run: - skip_docstore = pytest.mark.skip( - reason=f'{cur_doc_store} is disabled. Enable via pytest --document_store_type="{cur_doc_store}"' - ) - item.add_marker(skip_docstore) - if "milvus1" in keywords and not milvus1: + required_doc_store = infer_required_doc_store(item, keywords) + + if required_doc_store and required_doc_store not in document_store_types_to_run: + skip_docstore = pytest.mark.skip( + reason=f'{required_doc_store} is disabled. Enable via pytest --document_store_type="{required_doc_store}"' + ) + item.add_marker(skip_docstore) + + if "milvus1" == required_doc_store and not milvus1: skip_milvus1 = pytest.mark.skip(reason="Skipping Tests for 'milvus1', as Milvus2 seems to be installed.") item.add_marker(skip_milvus1) - elif "milvus" in keywords and milvus1: + elif "milvus" == required_doc_store and milvus1: skip_milvus = pytest.mark.skip(reason="Skipping Tests for 'milvus', as Milvus1 seems to be installed.") item.add_marker(skip_milvus) +def infer_required_doc_store(item, keywords): + # assumption: a test runs only with one document_store + # if there are multiple docstore markers, we apply the following heuristics: + # 1. if the test was parameterized, we use the the parameter + # 2. if the test name contains the docstore name, we use that + # 3. use an arbitrary one by calling set.pop() + required_doc_store = None + all_doc_stores = { + "elasticsearch", + "faiss", + "sql", + "memory", + "milvus1", + "milvus", + "weaviate", + "pinecone", + "opensearch", + } + docstore_markers = set(keywords).intersection(all_doc_stores) + if len(docstore_markers) > 1: + # if parameterized infer the docstore from the parameter + if hasattr(item, "callspec"): + for doc_store in all_doc_stores: + # callspec.id contains the parameter values of the test + if re.search(f"(^|-){doc_store}($|[-_])", item.callspec.id): + required_doc_store = doc_store + break + # if still not found, infer the docstore from the test name + if required_doc_store is None: + for doc_store in all_doc_stores: + if doc_store in item.name: + required_doc_store = doc_store + break + # if still not found or there is only one, use an arbitrary one from the markers + if required_doc_store is None: + required_doc_store = docstore_markers.pop() if docstore_markers else None + return required_doc_store + + # # Empty mocks, as a base for unit tests. # diff --git a/test/document_stores/test_faiss_and_milvus.py b/test/document_stores/test_faiss_and_milvus.py index cf0bc4a78..7ce675151 100644 --- a/test/document_stores/test_faiss_and_milvus.py +++ b/test/document_stores/test_faiss_and_milvus.py @@ -518,7 +518,7 @@ def test_cosine_similarity(document_store): # now check if vectors are normalized when updating embeddings class MockRetriever: def embed_documents(self, docs): - return [np.random.rand(768).astype(np.float32) for doc in docs] + return np.random.rand(len(docs), 768).astype(np.float32) retriever = MockRetriever() document_store.update_embeddings(retriever=retriever) diff --git a/test/nodes/test_retriever.py b/test/nodes/test_retriever.py index c5081b1e5..ffff02276 100644 --- a/test/nodes/test_retriever.py +++ b/test/nodes/test_retriever.py @@ -193,7 +193,7 @@ def test_dpr_embedding(document_store: BaseDocumentStore, retriever, docs_with_i # always normalize vector as faiss returns normalized vectors and other document stores do not embedding /= np.linalg.norm(embedding) assert len(embedding) == 768 - assert isclose(embedding[0], expected_value, rel_tol=0.001) + assert isclose(embedding[0], expected_value, rel_tol=0.01) @pytest.mark.integration