Fix milvus and faiss tests not running (#3263)

* fix milvus and faiss tests not running

* fix schema manually

* fix test_dpr_embedding test for milvus

* pip freeze on milvus tests

* fix milvus1 tests being executed: fix all_doc_stores order

* Revert "pip freeze on milvus tests"

This reverts commit 75ebb6f7e507bb8477e87d9e63b4a294f7946cab.

* make infer_required_doc_store more robust

* don't skip tests without docstore requirements

* use markers for docstore tests
This commit is contained in:
tstadel 2022-09-22 17:46:49 +02:00 committed by GitHub
parent 2b803a265b
commit 4fa9d2d8e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 61 additions and 29 deletions

View File

@ -313,7 +313,7 @@ jobs:
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=faiss
pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" test/document_stores/ --document_store_type=faiss
- uses: act10ns/slack@v1
with:
@ -348,7 +348,7 @@ jobs:
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss
pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss
- uses: act10ns/slack@v1
with:
@ -386,7 +386,7 @@ jobs:
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=milvus
pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" test/document_stores/ --document_store_type=milvus
- name: Dump docker logs on failure
if: failure()
@ -434,7 +434,7 @@ jobs:
# env:
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus
# pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus
weaviate-tests-linux:
@ -464,7 +464,7 @@ jobs:
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=weaviate
pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" test/document_stores/ --document_store_type=weaviate
- name: Dump docker logs on failure
if: failure()
@ -508,7 +508,7 @@ jobs:
# env:
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate
# pytest ${{ env.PYTEST_PARAMS }} -m "weaviate and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=weaviate
pinecone-tests-linux:
@ -536,7 +536,7 @@ jobs:
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" test/document_stores/ --document_store_type=pinecone
pytest ${{ env.PYTEST_PARAMS }} -m "pinecone and not integration" test/document_stores/ --document_store_type=pinecone
- uses: act10ns/slack@v1
with:
@ -573,7 +573,7 @@ jobs:
TOKENIZERS_PARALLELISM: 'false'
PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
pytest ${{ env.PYTEST_PARAMS }} -m "pinecone and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=pinecone
- uses: act10ns/slack@v1
with:

View File

@ -2,7 +2,7 @@ def pytest_addoption(parser):
parser.addoption(
"--document_store_type",
action="store",
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone, opensearch",
)

View File

@ -11,6 +11,7 @@ import uuid
import logging
from pathlib import Path
import os
import re
import requests_cache
import responses
@ -169,32 +170,63 @@ def pytest_collection_modifyitems(config, items):
keywords.extend(i.split("-"))
else:
keywords.append(i)
for cur_doc_store in [
"elasticsearch",
"faiss",
"sql",
"memory",
"milvus1",
"milvus",
"weaviate",
"pinecone",
"opensearch",
]:
if keywords and cur_doc_store in keywords and cur_doc_store not in document_store_types_to_run:
skip_docstore = pytest.mark.skip(
reason=f'{cur_doc_store} is disabled. Enable via pytest --document_store_type="{cur_doc_store}"'
)
item.add_marker(skip_docstore)
if "milvus1" in keywords and not milvus1:
required_doc_store = infer_required_doc_store(item, keywords)
if required_doc_store and required_doc_store not in document_store_types_to_run:
skip_docstore = pytest.mark.skip(
reason=f'{required_doc_store} is disabled. Enable via pytest --document_store_type="{required_doc_store}"'
)
item.add_marker(skip_docstore)
if "milvus1" == required_doc_store and not milvus1:
skip_milvus1 = pytest.mark.skip(reason="Skipping Tests for 'milvus1', as Milvus2 seems to be installed.")
item.add_marker(skip_milvus1)
elif "milvus" in keywords and milvus1:
elif "milvus" == required_doc_store and milvus1:
skip_milvus = pytest.mark.skip(reason="Skipping Tests for 'milvus', as Milvus1 seems to be installed.")
item.add_marker(skip_milvus)
def infer_required_doc_store(item, keywords):
# assumption: a test runs only with one document_store
# if there are multiple docstore markers, we apply the following heuristics:
# 1. if the test was parameterized, we use the the parameter
# 2. if the test name contains the docstore name, we use that
# 3. use an arbitrary one by calling set.pop()
required_doc_store = None
all_doc_stores = {
"elasticsearch",
"faiss",
"sql",
"memory",
"milvus1",
"milvus",
"weaviate",
"pinecone",
"opensearch",
}
docstore_markers = set(keywords).intersection(all_doc_stores)
if len(docstore_markers) > 1:
# if parameterized infer the docstore from the parameter
if hasattr(item, "callspec"):
for doc_store in all_doc_stores:
# callspec.id contains the parameter values of the test
if re.search(f"(^|-){doc_store}($|[-_])", item.callspec.id):
required_doc_store = doc_store
break
# if still not found, infer the docstore from the test name
if required_doc_store is None:
for doc_store in all_doc_stores:
if doc_store in item.name:
required_doc_store = doc_store
break
# if still not found or there is only one, use an arbitrary one from the markers
if required_doc_store is None:
required_doc_store = docstore_markers.pop() if docstore_markers else None
return required_doc_store
#
# Empty mocks, as a base for unit tests.
#

View File

@ -518,7 +518,7 @@ def test_cosine_similarity(document_store):
# now check if vectors are normalized when updating embeddings
class MockRetriever:
def embed_documents(self, docs):
return [np.random.rand(768).astype(np.float32) for doc in docs]
return np.random.rand(len(docs), 768).astype(np.float32)
retriever = MockRetriever()
document_store.update_embeddings(retriever=retriever)

View File

@ -193,7 +193,7 @@ def test_dpr_embedding(document_store: BaseDocumentStore, retriever, docs_with_i
# always normalize vector as faiss returns normalized vectors and other document stores do not
embedding /= np.linalg.norm(embedding)
assert len(embedding) == 768
assert isclose(embedding[0], expected_value, rel_tol=0.001)
assert isclose(embedding[0], expected_value, rel_tol=0.01)
@pytest.mark.integration