mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-15 18:08:40 +00:00
refactor: move dC tests to their own module and job (#3529)
* move dC tests to their own module and job * restore global var * revert
This commit is contained in:
parent
815017ad5b
commit
255072d8d5
27
.github/workflows/tests.yml
vendored
27
.github/workflows/tests.yml
vendored
@ -224,6 +224,33 @@ jobs:
|
|||||||
channel: '#haystack'
|
channel: '#haystack'
|
||||||
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||||
|
|
||||||
|
integration-tests-dc:
|
||||||
|
name: Integration / dC / ${{ matrix.os }}
|
||||||
|
needs:
|
||||||
|
- unit-tests
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest,macos-latest,windows-latest]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Setup Python
|
||||||
|
uses: ./.github/actions/python_cache/
|
||||||
|
|
||||||
|
- name: Install Haystack
|
||||||
|
run: pip install -U .
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_deepsetcloud.py
|
||||||
|
|
||||||
|
- uses: act10ns/slack@v1
|
||||||
|
with:
|
||||||
|
status: ${{ job.status }}
|
||||||
|
channel: '#haystack'
|
||||||
|
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
|
||||||
#
|
#
|
||||||
# TODO: the following steps need to be revisited
|
# TODO: the following steps need to be revisited
|
||||||
#
|
#
|
||||||
|
@ -4,6 +4,9 @@ def pytest_addoption(parser):
|
|||||||
action="store",
|
action="store",
|
||||||
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
|
default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
|
||||||
)
|
)
|
||||||
|
parser.addoption(
|
||||||
|
"--mock-dc", action="store_true", default=True, help="Mock HTTP requests to dC while running tests"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def pytest_generate_tests(metafunc):
|
def pytest_generate_tests(metafunc):
|
||||||
|
@ -26,7 +26,6 @@ import requests
|
|||||||
from haystack import Answer, BaseComponent
|
from haystack import Answer, BaseComponent
|
||||||
from haystack.document_stores import (
|
from haystack.document_stores import (
|
||||||
BaseDocumentStore,
|
BaseDocumentStore,
|
||||||
DeepsetCloudDocumentStore,
|
|
||||||
InMemoryDocumentStore,
|
InMemoryDocumentStore,
|
||||||
ElasticsearchDocumentStore,
|
ElasticsearchDocumentStore,
|
||||||
WeaviateDocumentStore,
|
WeaviateDocumentStore,
|
||||||
@ -86,11 +85,7 @@ from .mocks import pinecone as pinecone_mock
|
|||||||
|
|
||||||
# To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
|
# To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
|
||||||
SQL_TYPE = "sqlite"
|
SQL_TYPE = "sqlite"
|
||||||
# SQL_TYPE = "postgres"
|
|
||||||
|
|
||||||
SAMPLES_PATH = Path(__file__).parent / "samples"
|
SAMPLES_PATH = Path(__file__).parent / "samples"
|
||||||
|
|
||||||
# to run tests against Deepset Cloud set MOCK_DC to False and set the following params
|
|
||||||
DC_API_ENDPOINT = "https://DC_API/v1"
|
DC_API_ENDPOINT = "https://DC_API/v1"
|
||||||
DC_TEST_INDEX = "document_retrieval_1"
|
DC_TEST_INDEX = "document_retrieval_1"
|
||||||
DC_API_KEY = "NO_KEY"
|
DC_API_KEY = "NO_KEY"
|
||||||
@ -603,12 +598,6 @@ def deepset_cloud_fixture():
|
|||||||
responses.add_passthru(DC_API_ENDPOINT)
|
responses.add_passthru(DC_API_ENDPOINT)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
@responses.activate
|
|
||||||
def deepset_cloud_document_store(deepset_cloud_fixture):
|
|
||||||
return DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=DC_TEST_INDEX)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def rag_generator():
|
def rag_generator():
|
||||||
return RAGenerator(model_name_or_path="facebook/rag-token-nq", generator_type="token", max_length=20)
|
return RAGenerator(model_name_or_path="facebook/rag-token-nq", generator_type="token", max_length=20)
|
||||||
|
419
test/document_stores/test_deepsetcloud.py
Normal file
419
test/document_stores/test_deepsetcloud.py
Normal file
@ -0,0 +1,419 @@
|
|||||||
|
import logging
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import responses
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from responses import matchers
|
||||||
|
|
||||||
|
from haystack.document_stores import DeepsetCloudDocumentStore
|
||||||
|
from haystack.utils import DeepsetCloudError
|
||||||
|
from haystack.schema import Document, Label, Answer
|
||||||
|
|
||||||
|
|
||||||
|
DC_API_ENDPOINT = "https://dc.example.com/v1"
|
||||||
|
DC_TEST_INDEX = "document_retrieval_1"
|
||||||
|
DC_API_KEY = "NO_KEY"
|
||||||
|
SAMPLES_PATH = Path(__file__).parent.parent / "samples"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def dc_api_mock(request):
|
||||||
|
"""
|
||||||
|
This fixture contains responses activation, so either this one or ds() below must be
|
||||||
|
passed to tests that require mocking.
|
||||||
|
|
||||||
|
If `--mock-dc` was False, responses are never activated and it doesn't matter if the
|
||||||
|
fixture is passed or not.
|
||||||
|
"""
|
||||||
|
if request.config.getoption("--mock-dc"):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}",
|
||||||
|
match=[responses.matchers.header_matcher({"authorization": f"Bearer {DC_API_KEY}"})],
|
||||||
|
json={"indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 31}},
|
||||||
|
status=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
||||||
|
match=[responses.matchers.header_matcher({"authorization": f"Bearer {DC_API_KEY}"})],
|
||||||
|
json={
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"name": DC_TEST_INDEX,
|
||||||
|
"status": "DEPLOYED",
|
||||||
|
"indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 31},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"has_more": False,
|
||||||
|
"total": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# activate the default mock, same as using the @responses.activate everywhere
|
||||||
|
with responses.mock as m:
|
||||||
|
yield m
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.document_store
|
||||||
|
@pytest.mark.integration
|
||||||
|
@pytest.mark.usefixtures("dc_api_mock")
|
||||||
|
class TestDeepsetCloudDocumentStore:
|
||||||
|
|
||||||
|
# Fixtures
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def ds(self):
|
||||||
|
"""
|
||||||
|
We make this fixture depend on `dc_api_mock` so that passing the document store will
|
||||||
|
activate the mocking and we spare one function parameter.
|
||||||
|
"""
|
||||||
|
return DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=DC_TEST_INDEX)
|
||||||
|
|
||||||
|
# Integration tests
|
||||||
|
|
||||||
|
def test_init_with_dot_product(self, ds):
|
||||||
|
assert ds.return_embedding == False
|
||||||
|
assert ds.similarity == "dot_product"
|
||||||
|
|
||||||
|
def test_init_with_cosine(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(
|
||||||
|
api_endpoint=DC_API_ENDPOINT,
|
||||||
|
api_key=DC_API_KEY,
|
||||||
|
index=DC_TEST_INDEX,
|
||||||
|
similarity="cosine",
|
||||||
|
return_embedding=True,
|
||||||
|
)
|
||||||
|
assert document_store.return_embedding == True
|
||||||
|
assert document_store.similarity == "cosine"
|
||||||
|
|
||||||
|
def test_invalid_token(self):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
||||||
|
match=[matchers.header_matcher({"authorization": "Bearer invalid_token"})],
|
||||||
|
body="Internal Server Error",
|
||||||
|
status=500,
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(
|
||||||
|
DeepsetCloudError,
|
||||||
|
match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}/workspaces/default/pipelines failed: HTTP 500 - Internal Server Error",
|
||||||
|
):
|
||||||
|
DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key="invalid_token", index=DC_TEST_INDEX)
|
||||||
|
|
||||||
|
def test_invalid_api_endpoint(self):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET, url=f"{DC_API_ENDPOINT}00/workspaces/default/pipelines", body="Not Found", status=404
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(
|
||||||
|
DeepsetCloudError,
|
||||||
|
match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}00/workspaces/default/pipelines failed: "
|
||||||
|
f"HTTP 404 - Not Found\nNot Found",
|
||||||
|
):
|
||||||
|
DeepsetCloudDocumentStore(api_endpoint=f"{DC_API_ENDPOINT}00", api_key=DC_API_KEY, index=DC_TEST_INDEX)
|
||||||
|
|
||||||
|
def test_invalid_index(self, caplog):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/invalid_index",
|
||||||
|
body="Not Found",
|
||||||
|
status=404,
|
||||||
|
)
|
||||||
|
|
||||||
|
with caplog.at_level(logging.INFO):
|
||||||
|
DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index="invalid_index")
|
||||||
|
assert (
|
||||||
|
"You are using a DeepsetCloudDocumentStore with an index that does not exist on deepset Cloud."
|
||||||
|
in caplog.text
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_documents(self, ds):
|
||||||
|
with open(SAMPLES_PATH / "dc" / "documents-stream.response", "r") as f:
|
||||||
|
documents_stream_response = f.read()
|
||||||
|
docs = [json.loads(l) for l in documents_stream_response.splitlines()]
|
||||||
|
filtered_docs = [doc for doc in docs if doc["meta"]["file_id"] == docs[0]["meta"]["file_id"]]
|
||||||
|
documents_stream_filtered_response = "\n".join([json.dumps(d) for d in filtered_docs])
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.POST,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
|
||||||
|
body=documents_stream_response,
|
||||||
|
status=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.POST,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
|
||||||
|
match=[
|
||||||
|
matchers.json_params_matcher(
|
||||||
|
{"filters": {"file_id": [docs[0]["meta"]["file_id"]]}, "return_embedding": False}
|
||||||
|
)
|
||||||
|
],
|
||||||
|
body=documents_stream_filtered_response,
|
||||||
|
status=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
for doc in filtered_docs:
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents/{doc['id']}",
|
||||||
|
json=doc,
|
||||||
|
status=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = ds.get_all_documents()
|
||||||
|
assert len(docs) > 1
|
||||||
|
assert isinstance(docs[0], Document)
|
||||||
|
|
||||||
|
first_doc = next(ds.get_all_documents_generator())
|
||||||
|
assert isinstance(first_doc, Document)
|
||||||
|
assert first_doc.meta["file_id"] is not None
|
||||||
|
|
||||||
|
filtered_docs = ds.get_all_documents(filters={"file_id": [first_doc.meta["file_id"]]})
|
||||||
|
assert len(filtered_docs) > 0
|
||||||
|
assert len(filtered_docs) < len(docs)
|
||||||
|
|
||||||
|
ids = [doc.id for doc in filtered_docs]
|
||||||
|
single_doc_by_id = ds.get_document_by_id(ids[0])
|
||||||
|
assert single_doc_by_id is not None
|
||||||
|
assert single_doc_by_id.meta["file_id"] == first_doc.meta["file_id"]
|
||||||
|
|
||||||
|
docs_by_id = ds.get_documents_by_id(ids)
|
||||||
|
assert len(docs_by_id) == len(filtered_docs)
|
||||||
|
for doc in docs_by_id:
|
||||||
|
assert doc.meta["file_id"] == first_doc.meta["file_id"]
|
||||||
|
|
||||||
|
def test_query(self, ds):
|
||||||
|
|
||||||
|
with open(SAMPLES_PATH / "dc" / "query_winterfell.response", "r") as f:
|
||||||
|
query_winterfell_response = f.read()
|
||||||
|
query_winterfell_docs = json.loads(query_winterfell_response)
|
||||||
|
query_winterfell_filtered_docs = [
|
||||||
|
doc
|
||||||
|
for doc in query_winterfell_docs
|
||||||
|
if doc["meta"]["file_id"] == query_winterfell_docs[0]["meta"]["file_id"]
|
||||||
|
]
|
||||||
|
query_winterfell_filtered_response = json.dumps(query_winterfell_filtered_docs)
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.POST,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
||||||
|
match=[
|
||||||
|
matchers.json_params_matcher(
|
||||||
|
{"query": "winterfell", "top_k": 50, "all_terms_must_match": False, "scale_score": True}
|
||||||
|
)
|
||||||
|
],
|
||||||
|
status=200,
|
||||||
|
body=query_winterfell_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.POST,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
||||||
|
match=[
|
||||||
|
matchers.json_params_matcher(
|
||||||
|
{
|
||||||
|
"query": "winterfell",
|
||||||
|
"top_k": 50,
|
||||||
|
"filters": {"file_id": [query_winterfell_docs[0]["meta"]["file_id"]]},
|
||||||
|
"all_terms_must_match": False,
|
||||||
|
"scale_score": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
],
|
||||||
|
status=200,
|
||||||
|
body=query_winterfell_filtered_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
docs = ds.query("winterfell", top_k=50)
|
||||||
|
assert docs is not None
|
||||||
|
assert len(docs) > 0
|
||||||
|
|
||||||
|
first_doc = docs[0]
|
||||||
|
filtered_docs = ds.query("winterfell", top_k=50, filters={"file_id": [first_doc.meta["file_id"]]})
|
||||||
|
assert len(filtered_docs) > 0
|
||||||
|
assert len(filtered_docs) < len(docs)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"body, expected_count",
|
||||||
|
[
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"evaluation_set_id": str(uuid4()),
|
||||||
|
"name": DC_TEST_INDEX,
|
||||||
|
"created_at": "2022-03-22T13:40:27.535Z",
|
||||||
|
"matched_labels": 2,
|
||||||
|
"total_labels": 10,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"has_more": False,
|
||||||
|
"total": 1,
|
||||||
|
},
|
||||||
|
10,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
{
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"evaluation_set_id": str(uuid4()),
|
||||||
|
"name": DC_TEST_INDEX,
|
||||||
|
"created_at": "2022-03-22T13:40:27.535Z",
|
||||||
|
"matched_labels": 0,
|
||||||
|
"total_labels": 0,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"has_more": False,
|
||||||
|
"total": 1,
|
||||||
|
},
|
||||||
|
0,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_count_of_labels_for_evaluation_set(self, ds, body: dict, expected_count: int):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
|
||||||
|
status=200,
|
||||||
|
body=json.dumps(body),
|
||||||
|
)
|
||||||
|
|
||||||
|
count = ds.get_label_count(index=DC_TEST_INDEX)
|
||||||
|
assert count == expected_count
|
||||||
|
|
||||||
|
def test_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found(self, ds):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
|
||||||
|
status=200,
|
||||||
|
body=json.dumps({"data": [], "has_more": False, "total": 0}),
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
|
||||||
|
ds.get_label_count(index=DC_TEST_INDEX)
|
||||||
|
|
||||||
|
def test_lists_evaluation_sets(self, ds):
|
||||||
|
response_evaluation_set = {
|
||||||
|
"evaluation_set_id": str(uuid4()),
|
||||||
|
"name": DC_TEST_INDEX,
|
||||||
|
"created_at": "2022-03-22T13:40:27.535Z",
|
||||||
|
"matched_labels": 2,
|
||||||
|
"total_labels": 10,
|
||||||
|
}
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
|
||||||
|
status=200,
|
||||||
|
body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}),
|
||||||
|
)
|
||||||
|
|
||||||
|
evaluation_sets = ds.get_evaluation_sets()
|
||||||
|
assert evaluation_sets == [response_evaluation_set]
|
||||||
|
|
||||||
|
def test_fetches_labels_for_evaluation_set(self, ds):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
|
||||||
|
status=200,
|
||||||
|
body=json.dumps(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
||||||
|
"query": "What is berlin?",
|
||||||
|
"answer": "biggest city in germany",
|
||||||
|
"answer_start": 0,
|
||||||
|
"answer_end": 0,
|
||||||
|
"meta": {},
|
||||||
|
"context": "Berlin is the biggest city in germany.",
|
||||||
|
"external_file_name": "string",
|
||||||
|
"file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
||||||
|
"state": "Label matching status",
|
||||||
|
"candidates": "Candidates that were found in the label <-> file matching",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
labels = ds.get_all_labels(index=DC_TEST_INDEX)
|
||||||
|
assert labels == [
|
||||||
|
Label(
|
||||||
|
query="What is berlin?",
|
||||||
|
document=Document(content="Berlin is the biggest city in germany."),
|
||||||
|
is_correct_answer=True,
|
||||||
|
is_correct_document=True,
|
||||||
|
origin="user-feedback",
|
||||||
|
answer=Answer("biggest city in germany"),
|
||||||
|
id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
||||||
|
pipeline_id=None,
|
||||||
|
created_at=None,
|
||||||
|
updated_at=None,
|
||||||
|
meta={},
|
||||||
|
filters={},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_fetches_labels_for_evaluation_set_raises_deepsetclouderror_when_nothing_found(self, ds):
|
||||||
|
responses.add(
|
||||||
|
method=responses.GET,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
|
||||||
|
status=404,
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
|
||||||
|
ds.get_all_labels(index=DC_TEST_INDEX)
|
||||||
|
|
||||||
|
def test_query_by_embedding(self, ds):
|
||||||
|
query_emb = np.random.randn(768)
|
||||||
|
|
||||||
|
responses.add(
|
||||||
|
method=responses.POST,
|
||||||
|
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
||||||
|
match=[
|
||||||
|
matchers.json_params_matcher(
|
||||||
|
{"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True}
|
||||||
|
)
|
||||||
|
],
|
||||||
|
json=[],
|
||||||
|
status=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
emb_docs = ds.query_by_embedding(query_emb)
|
||||||
|
assert len(emb_docs) == 0
|
||||||
|
|
||||||
|
def test_get_all_docs_without_index(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert document_store.get_all_documents() == []
|
||||||
|
|
||||||
|
def test_get_all_docs_generator_without_index(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert list(document_store.get_all_documents_generator()) == []
|
||||||
|
|
||||||
|
def test_get_doc_by_id_without_index(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert document_store.get_document_by_id(id="some id") == None
|
||||||
|
|
||||||
|
def test_get_docs_by_id_without_index(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert document_store.get_documents_by_id(ids=["some id"]) == []
|
||||||
|
|
||||||
|
def test_get_doc_count_without_index(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert document_store.get_document_count() == 0
|
||||||
|
|
||||||
|
def test_query_by_emb_without_index(self):
|
||||||
|
query_emb = np.random.randn(768)
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert document_store.query_by_embedding(query_emb=query_emb) == []
|
||||||
|
|
||||||
|
def test_query_without_index(self):
|
||||||
|
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
||||||
|
assert document_store.query(query="some query") == []
|
@ -1,5 +1,4 @@
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import logging
|
|
||||||
import math
|
import math
|
||||||
import sys
|
import sys
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
@ -7,27 +6,12 @@ from uuid import uuid4
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
import json
|
|
||||||
import responses
|
|
||||||
from responses import matchers
|
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
from elasticsearch import Elasticsearch
|
|
||||||
from elasticsearch.exceptions import RequestError
|
|
||||||
|
|
||||||
from ..conftest import (
|
|
||||||
deepset_cloud_fixture,
|
from ..conftest import get_document_store, ensure_ids_are_correct_uuids
|
||||||
get_document_store,
|
|
||||||
ensure_ids_are_correct_uuids,
|
|
||||||
MOCK_DC,
|
|
||||||
DC_API_ENDPOINT,
|
|
||||||
DC_API_KEY,
|
|
||||||
DC_TEST_INDEX,
|
|
||||||
SAMPLES_PATH,
|
|
||||||
)
|
|
||||||
from haystack.document_stores import (
|
from haystack.document_stores import (
|
||||||
WeaviateDocumentStore,
|
WeaviateDocumentStore,
|
||||||
DeepsetCloudDocumentStore,
|
|
||||||
InMemoryDocumentStore,
|
|
||||||
MilvusDocumentStore,
|
MilvusDocumentStore,
|
||||||
FAISSDocumentStore,
|
FAISSDocumentStore,
|
||||||
ElasticsearchDocumentStore,
|
ElasticsearchDocumentStore,
|
||||||
@ -40,7 +24,6 @@ from haystack.errors import DuplicateDocumentError
|
|||||||
from haystack.schema import Document, Label, Answer, Span
|
from haystack.schema import Document, Label, Answer, Span
|
||||||
from haystack.nodes import EmbeddingRetriever, PreProcessor
|
from haystack.nodes import EmbeddingRetriever, PreProcessor
|
||||||
from haystack.pipelines import DocumentSearchPipeline
|
from haystack.pipelines import DocumentSearchPipeline
|
||||||
from haystack.utils import DeepsetCloudError
|
|
||||||
|
|
||||||
|
|
||||||
DOCUMENTS = [
|
DOCUMENTS = [
|
||||||
@ -1286,432 +1269,6 @@ def test_custom_headers(document_store_with_docs: BaseDocumentStore):
|
|||||||
assert len(documents) > 0
|
assert len(documents) > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_init_with_dot_product():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=DC_TEST_INDEX)
|
|
||||||
assert document_store.return_embedding == False
|
|
||||||
assert document_store.similarity == "dot_product"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_init_with_cosine():
|
|
||||||
document_store = DeepsetCloudDocumentStore(
|
|
||||||
api_endpoint=DC_API_ENDPOINT,
|
|
||||||
api_key=DC_API_KEY,
|
|
||||||
index=DC_TEST_INDEX,
|
|
||||||
similarity="cosine",
|
|
||||||
return_embedding=True,
|
|
||||||
)
|
|
||||||
assert document_store.return_embedding == True
|
|
||||||
assert document_store.similarity == "cosine"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_invalid_token():
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
|
|
||||||
match=[matchers.header_matcher({"authorization": "Bearer invalid_token"})],
|
|
||||||
body="Internal Server Error",
|
|
||||||
status=500,
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(
|
|
||||||
DeepsetCloudError,
|
|
||||||
match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}/workspaces/default/pipelines failed: HTTP 500 - Internal Server Error",
|
|
||||||
):
|
|
||||||
DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key="invalid_token", index=DC_TEST_INDEX)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_invalid_api_endpoint():
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET, url=f"{DC_API_ENDPOINT}00/workspaces/default/pipelines", body="Not Found", status=404
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(
|
|
||||||
DeepsetCloudError,
|
|
||||||
match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}00/workspaces/default/pipelines failed: "
|
|
||||||
f"HTTP 404 - Not Found\nNot Found",
|
|
||||||
):
|
|
||||||
DeepsetCloudDocumentStore(api_endpoint=f"{DC_API_ENDPOINT}00", api_key=DC_API_KEY, index=DC_TEST_INDEX)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_invalid_index(caplog):
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/invalid_index",
|
|
||||||
body="Not Found",
|
|
||||||
status=404,
|
|
||||||
)
|
|
||||||
|
|
||||||
with caplog.at_level(logging.INFO):
|
|
||||||
DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index="invalid_index")
|
|
||||||
assert (
|
|
||||||
"You are using a DeepsetCloudDocumentStore with an index that does not exist on deepset Cloud."
|
|
||||||
in caplog.text
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_documents(deepset_cloud_document_store):
|
|
||||||
if MOCK_DC:
|
|
||||||
with open(SAMPLES_PATH / "dc" / "documents-stream.response", "r") as f:
|
|
||||||
documents_stream_response = f.read()
|
|
||||||
docs = [json.loads(l) for l in documents_stream_response.splitlines()]
|
|
||||||
filtered_docs = [doc for doc in docs if doc["meta"]["file_id"] == docs[0]["meta"]["file_id"]]
|
|
||||||
documents_stream_filtered_response = "\n".join([json.dumps(d) for d in filtered_docs])
|
|
||||||
|
|
||||||
responses.add(
|
|
||||||
method=responses.POST,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
|
|
||||||
body=documents_stream_response,
|
|
||||||
status=200,
|
|
||||||
)
|
|
||||||
|
|
||||||
responses.add(
|
|
||||||
method=responses.POST,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
|
|
||||||
match=[
|
|
||||||
matchers.json_params_matcher(
|
|
||||||
{"filters": {"file_id": [docs[0]["meta"]["file_id"]]}, "return_embedding": False}
|
|
||||||
)
|
|
||||||
],
|
|
||||||
body=documents_stream_filtered_response,
|
|
||||||
status=200,
|
|
||||||
)
|
|
||||||
|
|
||||||
for doc in filtered_docs:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents/{doc['id']}",
|
|
||||||
json=doc,
|
|
||||||
status=200,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
docs = deepset_cloud_document_store.get_all_documents()
|
|
||||||
assert len(docs) > 1
|
|
||||||
assert isinstance(docs[0], Document)
|
|
||||||
|
|
||||||
first_doc = next(deepset_cloud_document_store.get_all_documents_generator())
|
|
||||||
assert isinstance(first_doc, Document)
|
|
||||||
assert first_doc.meta["file_id"] is not None
|
|
||||||
|
|
||||||
filtered_docs = deepset_cloud_document_store.get_all_documents(filters={"file_id": [first_doc.meta["file_id"]]})
|
|
||||||
assert len(filtered_docs) > 0
|
|
||||||
assert len(filtered_docs) < len(docs)
|
|
||||||
|
|
||||||
ids = [doc.id for doc in filtered_docs]
|
|
||||||
single_doc_by_id = deepset_cloud_document_store.get_document_by_id(ids[0])
|
|
||||||
assert single_doc_by_id is not None
|
|
||||||
assert single_doc_by_id.meta["file_id"] == first_doc.meta["file_id"]
|
|
||||||
|
|
||||||
docs_by_id = deepset_cloud_document_store.get_documents_by_id(ids)
|
|
||||||
assert len(docs_by_id) == len(filtered_docs)
|
|
||||||
for doc in docs_by_id:
|
|
||||||
assert doc.meta["file_id"] == first_doc.meta["file_id"]
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_query(deepset_cloud_document_store):
|
|
||||||
if MOCK_DC:
|
|
||||||
with open(SAMPLES_PATH / "dc" / "query_winterfell.response", "r") as f:
|
|
||||||
query_winterfell_response = f.read()
|
|
||||||
query_winterfell_docs = json.loads(query_winterfell_response)
|
|
||||||
query_winterfell_filtered_docs = [
|
|
||||||
doc
|
|
||||||
for doc in query_winterfell_docs
|
|
||||||
if doc["meta"]["file_id"] == query_winterfell_docs[0]["meta"]["file_id"]
|
|
||||||
]
|
|
||||||
query_winterfell_filtered_response = json.dumps(query_winterfell_filtered_docs)
|
|
||||||
|
|
||||||
responses.add(
|
|
||||||
method=responses.POST,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
|
||||||
match=[
|
|
||||||
matchers.json_params_matcher(
|
|
||||||
{"query": "winterfell", "top_k": 50, "all_terms_must_match": False, "scale_score": True}
|
|
||||||
)
|
|
||||||
],
|
|
||||||
status=200,
|
|
||||||
body=query_winterfell_response,
|
|
||||||
)
|
|
||||||
|
|
||||||
responses.add(
|
|
||||||
method=responses.POST,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
|
||||||
match=[
|
|
||||||
matchers.json_params_matcher(
|
|
||||||
{
|
|
||||||
"query": "winterfell",
|
|
||||||
"top_k": 50,
|
|
||||||
"filters": {"file_id": [query_winterfell_docs[0]["meta"]["file_id"]]},
|
|
||||||
"all_terms_must_match": False,
|
|
||||||
"scale_score": True,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
],
|
|
||||||
status=200,
|
|
||||||
body=query_winterfell_filtered_response,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
docs = deepset_cloud_document_store.query("winterfell", top_k=50)
|
|
||||||
assert docs is not None
|
|
||||||
assert len(docs) > 0
|
|
||||||
|
|
||||||
first_doc = docs[0]
|
|
||||||
filtered_docs = deepset_cloud_document_store.query(
|
|
||||||
"winterfell", top_k=50, filters={"file_id": [first_doc.meta["file_id"]]}
|
|
||||||
)
|
|
||||||
assert len(filtered_docs) > 0
|
|
||||||
assert len(filtered_docs) < len(docs)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"body, expected_count",
|
|
||||||
[
|
|
||||||
(
|
|
||||||
{
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"evaluation_set_id": str(uuid4()),
|
|
||||||
"name": DC_TEST_INDEX,
|
|
||||||
"created_at": "2022-03-22T13:40:27.535Z",
|
|
||||||
"matched_labels": 2,
|
|
||||||
"total_labels": 10,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"has_more": False,
|
|
||||||
"total": 1,
|
|
||||||
},
|
|
||||||
10,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
{
|
|
||||||
"data": [
|
|
||||||
{
|
|
||||||
"evaluation_set_id": str(uuid4()),
|
|
||||||
"name": DC_TEST_INDEX,
|
|
||||||
"created_at": "2022-03-22T13:40:27.535Z",
|
|
||||||
"matched_labels": 0,
|
|
||||||
"total_labels": 0,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"has_more": False,
|
|
||||||
"total": 1,
|
|
||||||
},
|
|
||||||
0,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set(
|
|
||||||
deepset_cloud_document_store, body: dict, expected_count: int
|
|
||||||
):
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
|
|
||||||
status=200,
|
|
||||||
body=json.dumps(body),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
count = deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
|
|
||||||
assert count == expected_count
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found(
|
|
||||||
deepset_cloud_document_store,
|
|
||||||
):
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
|
|
||||||
status=200,
|
|
||||||
body=json.dumps({"data": [], "has_more": False, "total": 0}),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
|
|
||||||
deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_lists_evaluation_sets(deepset_cloud_document_store):
|
|
||||||
response_evaluation_set = {
|
|
||||||
"evaluation_set_id": str(uuid4()),
|
|
||||||
"name": DC_TEST_INDEX,
|
|
||||||
"created_at": "2022-03-22T13:40:27.535Z",
|
|
||||||
"matched_labels": 2,
|
|
||||||
"total_labels": 10,
|
|
||||||
}
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
|
|
||||||
status=200,
|
|
||||||
body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
evaluation_sets = deepset_cloud_document_store.get_evaluation_sets()
|
|
||||||
assert evaluation_sets == [response_evaluation_set]
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_cloud_document_store):
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
|
|
||||||
status=200,
|
|
||||||
body=json.dumps(
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
|
||||||
"query": "What is berlin?",
|
|
||||||
"answer": "biggest city in germany",
|
|
||||||
"answer_start": 0,
|
|
||||||
"answer_end": 0,
|
|
||||||
"meta": {},
|
|
||||||
"context": "Berlin is the biggest city in germany.",
|
|
||||||
"external_file_name": "string",
|
|
||||||
"file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
|
||||||
"state": "Label matching status",
|
|
||||||
"candidates": "Candidates that were found in the label <-> file matching",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
labels = deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
|
|
||||||
assert labels == [
|
|
||||||
Label(
|
|
||||||
query="What is berlin?",
|
|
||||||
document=Document(content="Berlin is the biggest city in germany."),
|
|
||||||
is_correct_answer=True,
|
|
||||||
is_correct_document=True,
|
|
||||||
origin="user-feedback",
|
|
||||||
answer=Answer("biggest city in germany"),
|
|
||||||
id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
|
|
||||||
pipeline_id=None,
|
|
||||||
created_at=None,
|
|
||||||
updated_at=None,
|
|
||||||
meta={},
|
|
||||||
filters={},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set_raises_deepsetclouderror_when_nothing_found(
|
|
||||||
deepset_cloud_document_store,
|
|
||||||
):
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.GET,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
|
|
||||||
status=404,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
|
|
||||||
deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_store):
|
|
||||||
query_emb = np.random.randn(768)
|
|
||||||
if MOCK_DC:
|
|
||||||
responses.add(
|
|
||||||
method=responses.POST,
|
|
||||||
url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
|
|
||||||
match=[
|
|
||||||
matchers.json_params_matcher(
|
|
||||||
{"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True}
|
|
||||||
)
|
|
||||||
],
|
|
||||||
json=[],
|
|
||||||
status=200,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
responses.add_passthru(DC_API_ENDPOINT)
|
|
||||||
|
|
||||||
emb_docs = deepset_cloud_document_store.query_by_embedding(query_emb)
|
|
||||||
assert len(emb_docs) == 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_get_all_docs_without_index():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert document_store.get_all_documents() == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_get_all_docs_generator_without_index():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert list(document_store.get_all_documents_generator()) == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_get_doc_by_id_without_index():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert document_store.get_document_by_id(id="some id") == None
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_get_docs_by_id_without_index():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert document_store.get_documents_by_id(ids=["some id"]) == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_get_doc_count_without_index():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert document_store.get_document_count() == 0
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_query_by_emb_without_index():
|
|
||||||
query_emb = np.random.randn(768)
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert document_store.query_by_embedding(query_emb=query_emb) == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
|
|
||||||
@responses.activate
|
|
||||||
def test_DeepsetCloudDocumentStore_query_without_index():
|
|
||||||
document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
|
|
||||||
assert document_store.query(query="some query") == []
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
||||||
def test_elasticsearch_brownfield_support(document_store_with_docs):
|
def test_elasticsearch_brownfield_support(document_store_with_docs):
|
||||||
new_document_store = InMemoryDocumentStore()
|
new_document_store = InMemoryDocumentStore()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user