refactor: move dC tests to their own module and job (#3529)

* move dC tests to their own module and job * restore global var * revert
2025-10-14 09:28:56 +00:00 · 2022-11-04 17:05:10 +01:00 · 2022-11-04 17:05:10 +01:00 · 255072d8d5
commit 255072d8d5
parent 815017ad5b
5 changed files with 451 additions and 456 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -224,6 +224,33 @@ jobs:
          channel: '#haystack'
        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

+  integration-tests-dc:
+    name: Integration / dC / ${{ matrix.os }}
+    needs:
+     - unit-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest,macos-latest,windows-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: ./.github/actions/python_cache/
+
+      - name: Install Haystack
+        run: pip install -U .
+
+      - name: Run tests
+        run: |
+          pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_deepsetcloud.py
+
+      - uses: act10ns/slack@v1
+        with:
+          status: ${{ job.status }}
+          channel: '#haystack'
+        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
 #
 # TODO: the following steps need to be revisited
 #
--- a/conftest.py
+++ b/conftest.py
@ -4,6 +4,9 @@ def pytest_addoption(parser):
        action="store",
        default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
    )
+    parser.addoption(
+        "--mock-dc", action="store_true", default=True, help="Mock HTTP requests to dC while running tests"
+    )


 def pytest_generate_tests(metafunc):
--- a/test/conftest.py
+++ b/test/conftest.py
@ -26,7 +26,6 @@ import requests
 from haystack import Answer, BaseComponent
 from haystack.document_stores import (
    BaseDocumentStore,
-    DeepsetCloudDocumentStore,
    InMemoryDocumentStore,
    ElasticsearchDocumentStore,
    WeaviateDocumentStore,
@ -86,11 +85,7 @@ from .mocks import pinecone as pinecone_mock

 # To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
 SQL_TYPE = "sqlite"
-# SQL_TYPE = "postgres"
-
 SAMPLES_PATH = Path(__file__).parent / "samples"
-
-# to run tests against Deepset Cloud set MOCK_DC to False and set the following params
 DC_API_ENDPOINT = "https://DC_API/v1"
 DC_TEST_INDEX = "document_retrieval_1"
 DC_API_KEY = "NO_KEY"
@ -603,12 +598,6 @@ def deepset_cloud_fixture():
        responses.add_passthru(DC_API_ENDPOINT)


-@pytest.fixture
-@responses.activate
-def deepset_cloud_document_store(deepset_cloud_fixture):
-    return DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=DC_TEST_INDEX)
-
-
@pytest.fixture
 def rag_generator():
    return RAGenerator(model_name_or_path="facebook/rag-token-nq", generator_type="token", max_length=20)
--- a/test/document_stores/test_deepsetcloud.py
+++ b/test/document_stores/test_deepsetcloud.py
@ -0,0 +1,419 @@
+import logging
+import json
+from pathlib import Path
+from uuid import uuid4
+
+import pytest
+import responses
+import numpy as np
+
+from responses import matchers
+
+from haystack.document_stores import DeepsetCloudDocumentStore
+from haystack.utils import DeepsetCloudError
+from haystack.schema import Document, Label, Answer
+
+
+DC_API_ENDPOINT = "https://dc.example.com/v1"
+DC_TEST_INDEX = "document_retrieval_1"
+DC_API_KEY = "NO_KEY"
+SAMPLES_PATH = Path(__file__).parent.parent / "samples"
+
+
+@pytest.fixture
+def dc_api_mock(request):
+    """
+    This fixture contains responses activation, so either this one or ds() below must be
+    passed to tests that require mocking.
+
+    If `--mock-dc` was False, responses are never activated and it doesn't matter if the
+    fixture is passed or not.
+    """
+    if request.config.getoption("--mock-dc"):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}",
+            match=[responses.matchers.header_matcher({"authorization": f"Bearer {DC_API_KEY}"})],
+            json={"indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 31}},
+            status=200,
+        )
+
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
+            match=[responses.matchers.header_matcher({"authorization": f"Bearer {DC_API_KEY}"})],
+            json={
+                "data": [
+                    {
+                        "name": DC_TEST_INDEX,
+                        "status": "DEPLOYED",
+                        "indexing": {"status": "INDEXED", "pending_file_count": 0, "total_file_count": 31},
+                    }
+                ],
+                "has_more": False,
+                "total": 1,
+            },
+        )
+
+        # activate the default mock, same as using the @responses.activate everywhere
+        with responses.mock as m:
+            yield m
+
+
+@pytest.mark.document_store
+@pytest.mark.integration
+@pytest.mark.usefixtures("dc_api_mock")
+class TestDeepsetCloudDocumentStore:
+
+    # Fixtures
+
+    @pytest.fixture
+    def ds(self):
+        """
+        We make this fixture depend on `dc_api_mock` so that passing the document store will
+        activate the mocking and we spare one function parameter.
+        """
+        return DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=DC_TEST_INDEX)
+
+    # Integration tests
+
+    def test_init_with_dot_product(self, ds):
+        assert ds.return_embedding == False
+        assert ds.similarity == "dot_product"
+
+    def test_init_with_cosine(self):
+        document_store = DeepsetCloudDocumentStore(
+            api_endpoint=DC_API_ENDPOINT,
+            api_key=DC_API_KEY,
+            index=DC_TEST_INDEX,
+            similarity="cosine",
+            return_embedding=True,
+        )
+        assert document_store.return_embedding == True
+        assert document_store.similarity == "cosine"
+
+    def test_invalid_token(self):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
+            match=[matchers.header_matcher({"authorization": "Bearer invalid_token"})],
+            body="Internal Server Error",
+            status=500,
+        )
+
+        with pytest.raises(
+            DeepsetCloudError,
+            match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}/workspaces/default/pipelines failed: HTTP 500 - Internal Server Error",
+        ):
+            DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key="invalid_token", index=DC_TEST_INDEX)
+
+    def test_invalid_api_endpoint(self):
+        responses.add(
+            method=responses.GET, url=f"{DC_API_ENDPOINT}00/workspaces/default/pipelines", body="Not Found", status=404
+        )
+
+        with pytest.raises(
+            DeepsetCloudError,
+            match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}00/workspaces/default/pipelines failed: "
+            f"HTTP 404 - Not Found\nNot Found",
+        ):
+            DeepsetCloudDocumentStore(api_endpoint=f"{DC_API_ENDPOINT}00", api_key=DC_API_KEY, index=DC_TEST_INDEX)
+
+    def test_invalid_index(self, caplog):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/invalid_index",
+            body="Not Found",
+            status=404,
+        )
+
+        with caplog.at_level(logging.INFO):
+            DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index="invalid_index")
+            assert (
+                "You are using a DeepsetCloudDocumentStore with an index that does not exist on deepset Cloud."
+                in caplog.text
+            )
+
+    def test_documents(self, ds):
+        with open(SAMPLES_PATH / "dc" / "documents-stream.response", "r") as f:
+            documents_stream_response = f.read()
+            docs = [json.loads(l) for l in documents_stream_response.splitlines()]
+            filtered_docs = [doc for doc in docs if doc["meta"]["file_id"] == docs[0]["meta"]["file_id"]]
+            documents_stream_filtered_response = "\n".join([json.dumps(d) for d in filtered_docs])
+
+            responses.add(
+                method=responses.POST,
+                url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
+                body=documents_stream_response,
+                status=200,
+            )
+
+            responses.add(
+                method=responses.POST,
+                url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
+                match=[
+                    matchers.json_params_matcher(
+                        {"filters": {"file_id": [docs[0]["meta"]["file_id"]]}, "return_embedding": False}
+                    )
+                ],
+                body=documents_stream_filtered_response,
+                status=200,
+            )
+
+            for doc in filtered_docs:
+                responses.add(
+                    method=responses.GET,
+                    url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents/{doc['id']}",
+                    json=doc,
+                    status=200,
+                )
+
+        docs = ds.get_all_documents()
+        assert len(docs) > 1
+        assert isinstance(docs[0], Document)
+
+        first_doc = next(ds.get_all_documents_generator())
+        assert isinstance(first_doc, Document)
+        assert first_doc.meta["file_id"] is not None
+
+        filtered_docs = ds.get_all_documents(filters={"file_id": [first_doc.meta["file_id"]]})
+        assert len(filtered_docs) > 0
+        assert len(filtered_docs) < len(docs)
+
+        ids = [doc.id for doc in filtered_docs]
+        single_doc_by_id = ds.get_document_by_id(ids[0])
+        assert single_doc_by_id is not None
+        assert single_doc_by_id.meta["file_id"] == first_doc.meta["file_id"]
+
+        docs_by_id = ds.get_documents_by_id(ids)
+        assert len(docs_by_id) == len(filtered_docs)
+        for doc in docs_by_id:
+            assert doc.meta["file_id"] == first_doc.meta["file_id"]
+
+    def test_query(self, ds):
+
+        with open(SAMPLES_PATH / "dc" / "query_winterfell.response", "r") as f:
+            query_winterfell_response = f.read()
+            query_winterfell_docs = json.loads(query_winterfell_response)
+            query_winterfell_filtered_docs = [
+                doc
+                for doc in query_winterfell_docs
+                if doc["meta"]["file_id"] == query_winterfell_docs[0]["meta"]["file_id"]
+            ]
+            query_winterfell_filtered_response = json.dumps(query_winterfell_filtered_docs)
+
+        responses.add(
+            method=responses.POST,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
+            match=[
+                matchers.json_params_matcher(
+                    {"query": "winterfell", "top_k": 50, "all_terms_must_match": False, "scale_score": True}
+                )
+            ],
+            status=200,
+            body=query_winterfell_response,
+        )
+
+        responses.add(
+            method=responses.POST,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
+            match=[
+                matchers.json_params_matcher(
+                    {
+                        "query": "winterfell",
+                        "top_k": 50,
+                        "filters": {"file_id": [query_winterfell_docs[0]["meta"]["file_id"]]},
+                        "all_terms_must_match": False,
+                        "scale_score": True,
+                    }
+                )
+            ],
+            status=200,
+            body=query_winterfell_filtered_response,
+        )
+
+        docs = ds.query("winterfell", top_k=50)
+        assert docs is not None
+        assert len(docs) > 0
+
+        first_doc = docs[0]
+        filtered_docs = ds.query("winterfell", top_k=50, filters={"file_id": [first_doc.meta["file_id"]]})
+        assert len(filtered_docs) > 0
+        assert len(filtered_docs) < len(docs)
+
+    @pytest.mark.parametrize(
+        "body, expected_count",
+        [
+            (
+                {
+                    "data": [
+                        {
+                            "evaluation_set_id": str(uuid4()),
+                            "name": DC_TEST_INDEX,
+                            "created_at": "2022-03-22T13:40:27.535Z",
+                            "matched_labels": 2,
+                            "total_labels": 10,
+                        }
+                    ],
+                    "has_more": False,
+                    "total": 1,
+                },
+                10,
+            ),
+            (
+                {
+                    "data": [
+                        {
+                            "evaluation_set_id": str(uuid4()),
+                            "name": DC_TEST_INDEX,
+                            "created_at": "2022-03-22T13:40:27.535Z",
+                            "matched_labels": 0,
+                            "total_labels": 0,
+                        }
+                    ],
+                    "has_more": False,
+                    "total": 1,
+                },
+                0,
+            ),
+        ],
+    )
+    def test_count_of_labels_for_evaluation_set(self, ds, body: dict, expected_count: int):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps(body),
+        )
+
+        count = ds.get_label_count(index=DC_TEST_INDEX)
+        assert count == expected_count
+
+    def test_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found(self, ds):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps({"data": [], "has_more": False, "total": 0}),
+        )
+
+        with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
+            ds.get_label_count(index=DC_TEST_INDEX)
+
+    def test_lists_evaluation_sets(self, ds):
+        response_evaluation_set = {
+            "evaluation_set_id": str(uuid4()),
+            "name": DC_TEST_INDEX,
+            "created_at": "2022-03-22T13:40:27.535Z",
+            "matched_labels": 2,
+            "total_labels": 10,
+        }
+
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}),
+        )
+
+        evaluation_sets = ds.get_evaluation_sets()
+        assert evaluation_sets == [response_evaluation_set]
+
+    def test_fetches_labels_for_evaluation_set(self, ds):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
+            status=200,
+            body=json.dumps(
+                [
+                    {
+                        "label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+                        "query": "What is berlin?",
+                        "answer": "biggest city in germany",
+                        "answer_start": 0,
+                        "answer_end": 0,
+                        "meta": {},
+                        "context": "Berlin is the biggest city in germany.",
+                        "external_file_name": "string",
+                        "file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+                        "state": "Label matching status",
+                        "candidates": "Candidates that were found in the label <-> file matching",
+                    }
+                ]
+            ),
+        )
+
+        labels = ds.get_all_labels(index=DC_TEST_INDEX)
+        assert labels == [
+            Label(
+                query="What is berlin?",
+                document=Document(content="Berlin is the biggest city in germany."),
+                is_correct_answer=True,
+                is_correct_document=True,
+                origin="user-feedback",
+                answer=Answer("biggest city in germany"),
+                id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
+                pipeline_id=None,
+                created_at=None,
+                updated_at=None,
+                meta={},
+                filters={},
+            )
+        ]
+
+    def test_fetches_labels_for_evaluation_set_raises_deepsetclouderror_when_nothing_found(self, ds):
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
+            status=404,
+        )
+
+        with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
+            ds.get_all_labels(index=DC_TEST_INDEX)
+
+    def test_query_by_embedding(self, ds):
+        query_emb = np.random.randn(768)
+
+        responses.add(
+            method=responses.POST,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
+            match=[
+                matchers.json_params_matcher(
+                    {"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True}
+                )
+            ],
+            json=[],
+            status=200,
+        )
+
+        emb_docs = ds.query_by_embedding(query_emb)
+        assert len(emb_docs) == 0
+
+    def test_get_all_docs_without_index(self):
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert document_store.get_all_documents() == []
+
+    def test_get_all_docs_generator_without_index(self):
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert list(document_store.get_all_documents_generator()) == []
+
+    def test_get_doc_by_id_without_index(self):
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert document_store.get_document_by_id(id="some id") == None
+
+    def test_get_docs_by_id_without_index(self):
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert document_store.get_documents_by_id(ids=["some id"]) == []
+
+    def test_get_doc_count_without_index(self):
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert document_store.get_document_count() == 0
+
+    def test_query_by_emb_without_index(self):
+        query_emb = np.random.randn(768)
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert document_store.query_by_embedding(query_emb=query_emb) == []
+
+    def test_query_without_index(self):
+        document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
+        assert document_store.query(query="some query") == []
--- a/test/document_stores/test_document_store.py
+++ b/test/document_stores/test_document_store.py
@ -1,5 +1,4 @@
 from copy import deepcopy
-import logging
 import math
 import sys
 from uuid import uuid4
@ -7,27 +6,12 @@ from uuid import uuid4
 import numpy as np
 import pandas as pd
 import pytest
-import json
-import responses
-from responses import matchers
 from unittest.mock import Mock
-from elasticsearch import Elasticsearch
-from elasticsearch.exceptions import RequestError

-from ..conftest import (
-    deepset_cloud_fixture,
-    get_document_store,
-    ensure_ids_are_correct_uuids,
-    MOCK_DC,
-    DC_API_ENDPOINT,
-    DC_API_KEY,
-    DC_TEST_INDEX,
-    SAMPLES_PATH,
-)
+
+from ..conftest import get_document_store, ensure_ids_are_correct_uuids
 from haystack.document_stores import (
    WeaviateDocumentStore,
-    DeepsetCloudDocumentStore,
-    InMemoryDocumentStore,
    MilvusDocumentStore,
    FAISSDocumentStore,
    ElasticsearchDocumentStore,
@ -40,7 +24,6 @@ from haystack.errors import DuplicateDocumentError
 from haystack.schema import Document, Label, Answer, Span
 from haystack.nodes import EmbeddingRetriever, PreProcessor
 from haystack.pipelines import DocumentSearchPipeline
-from haystack.utils import DeepsetCloudError


 DOCUMENTS = [
@ -1286,432 +1269,6 @@ def test_custom_headers(document_store_with_docs: BaseDocumentStore):
        assert len(documents) > 0


-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_init_with_dot_product():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=DC_TEST_INDEX)
-    assert document_store.return_embedding == False
-    assert document_store.similarity == "dot_product"
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_init_with_cosine():
-    document_store = DeepsetCloudDocumentStore(
-        api_endpoint=DC_API_ENDPOINT,
-        api_key=DC_API_KEY,
-        index=DC_TEST_INDEX,
-        similarity="cosine",
-        return_embedding=True,
-    )
-    assert document_store.return_embedding == True
-    assert document_store.similarity == "cosine"
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_invalid_token():
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/pipelines",
-            match=[matchers.header_matcher({"authorization": "Bearer invalid_token"})],
-            body="Internal Server Error",
-            status=500,
-        )
-
-    with pytest.raises(
-        DeepsetCloudError,
-        match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}/workspaces/default/pipelines failed: HTTP 500 - Internal Server Error",
-    ):
-        DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key="invalid_token", index=DC_TEST_INDEX)
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_invalid_api_endpoint():
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET, url=f"{DC_API_ENDPOINT}00/workspaces/default/pipelines", body="Not Found", status=404
-        )
-
-    with pytest.raises(
-        DeepsetCloudError,
-        match=f"Could not connect to deepset Cloud:\nGET {DC_API_ENDPOINT}00/workspaces/default/pipelines failed: "
-        f"HTTP 404 - Not Found\nNot Found",
-    ):
-        DeepsetCloudDocumentStore(api_endpoint=f"{DC_API_ENDPOINT}00", api_key=DC_API_KEY, index=DC_TEST_INDEX)
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_invalid_index(caplog):
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/invalid_index",
-            body="Not Found",
-            status=404,
-        )
-
-    with caplog.at_level(logging.INFO):
-        DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index="invalid_index")
-        assert (
-            "You are using a DeepsetCloudDocumentStore with an index that does not exist on deepset Cloud."
-            in caplog.text
-        )
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_documents(deepset_cloud_document_store):
-    if MOCK_DC:
-        with open(SAMPLES_PATH / "dc" / "documents-stream.response", "r") as f:
-            documents_stream_response = f.read()
-            docs = [json.loads(l) for l in documents_stream_response.splitlines()]
-            filtered_docs = [doc for doc in docs if doc["meta"]["file_id"] == docs[0]["meta"]["file_id"]]
-            documents_stream_filtered_response = "\n".join([json.dumps(d) for d in filtered_docs])
-
-            responses.add(
-                method=responses.POST,
-                url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
-                body=documents_stream_response,
-                status=200,
-            )
-
-            responses.add(
-                method=responses.POST,
-                url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-stream",
-                match=[
-                    matchers.json_params_matcher(
-                        {"filters": {"file_id": [docs[0]["meta"]["file_id"]]}, "return_embedding": False}
-                    )
-                ],
-                body=documents_stream_filtered_response,
-                status=200,
-            )
-
-            for doc in filtered_docs:
-                responses.add(
-                    method=responses.GET,
-                    url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents/{doc['id']}",
-                    json=doc,
-                    status=200,
-                )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    docs = deepset_cloud_document_store.get_all_documents()
-    assert len(docs) > 1
-    assert isinstance(docs[0], Document)
-
-    first_doc = next(deepset_cloud_document_store.get_all_documents_generator())
-    assert isinstance(first_doc, Document)
-    assert first_doc.meta["file_id"] is not None
-
-    filtered_docs = deepset_cloud_document_store.get_all_documents(filters={"file_id": [first_doc.meta["file_id"]]})
-    assert len(filtered_docs) > 0
-    assert len(filtered_docs) < len(docs)
-
-    ids = [doc.id for doc in filtered_docs]
-    single_doc_by_id = deepset_cloud_document_store.get_document_by_id(ids[0])
-    assert single_doc_by_id is not None
-    assert single_doc_by_id.meta["file_id"] == first_doc.meta["file_id"]
-
-    docs_by_id = deepset_cloud_document_store.get_documents_by_id(ids)
-    assert len(docs_by_id) == len(filtered_docs)
-    for doc in docs_by_id:
-        assert doc.meta["file_id"] == first_doc.meta["file_id"]
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_query(deepset_cloud_document_store):
-    if MOCK_DC:
-        with open(SAMPLES_PATH / "dc" / "query_winterfell.response", "r") as f:
-            query_winterfell_response = f.read()
-            query_winterfell_docs = json.loads(query_winterfell_response)
-            query_winterfell_filtered_docs = [
-                doc
-                for doc in query_winterfell_docs
-                if doc["meta"]["file_id"] == query_winterfell_docs[0]["meta"]["file_id"]
-            ]
-            query_winterfell_filtered_response = json.dumps(query_winterfell_filtered_docs)
-
-        responses.add(
-            method=responses.POST,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
-            match=[
-                matchers.json_params_matcher(
-                    {"query": "winterfell", "top_k": 50, "all_terms_must_match": False, "scale_score": True}
-                )
-            ],
-            status=200,
-            body=query_winterfell_response,
-        )
-
-        responses.add(
-            method=responses.POST,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
-            match=[
-                matchers.json_params_matcher(
-                    {
-                        "query": "winterfell",
-                        "top_k": 50,
-                        "filters": {"file_id": [query_winterfell_docs[0]["meta"]["file_id"]]},
-                        "all_terms_must_match": False,
-                        "scale_score": True,
-                    }
-                )
-            ],
-            status=200,
-            body=query_winterfell_filtered_response,
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    docs = deepset_cloud_document_store.query("winterfell", top_k=50)
-    assert docs is not None
-    assert len(docs) > 0
-
-    first_doc = docs[0]
-    filtered_docs = deepset_cloud_document_store.query(
-        "winterfell", top_k=50, filters={"file_id": [first_doc.meta["file_id"]]}
-    )
-    assert len(filtered_docs) > 0
-    assert len(filtered_docs) < len(docs)
-
-
-@pytest.mark.parametrize(
-    "body, expected_count",
-    [
-        (
-            {
-                "data": [
-                    {
-                        "evaluation_set_id": str(uuid4()),
-                        "name": DC_TEST_INDEX,
-                        "created_at": "2022-03-22T13:40:27.535Z",
-                        "matched_labels": 2,
-                        "total_labels": 10,
-                    }
-                ],
-                "has_more": False,
-                "total": 1,
-            },
-            10,
-        ),
-        (
-            {
-                "data": [
-                    {
-                        "evaluation_set_id": str(uuid4()),
-                        "name": DC_TEST_INDEX,
-                        "created_at": "2022-03-22T13:40:27.535Z",
-                        "matched_labels": 0,
-                        "total_labels": 0,
-                    }
-                ],
-                "has_more": False,
-                "total": 1,
-            },
-            0,
-        ),
-    ],
-)
-@responses.activate
-def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set(
-    deepset_cloud_document_store, body: dict, expected_count: int
-):
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
-            status=200,
-            body=json.dumps(body),
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    count = deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
-    assert count == expected_count
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found(
-    deepset_cloud_document_store,
-):
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
-            status=200,
-            body=json.dumps({"data": [], "has_more": False, "total": 0}),
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
-        deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_lists_evaluation_sets(deepset_cloud_document_store):
-    response_evaluation_set = {
-        "evaluation_set_id": str(uuid4()),
-        "name": DC_TEST_INDEX,
-        "created_at": "2022-03-22T13:40:27.535Z",
-        "matched_labels": 2,
-        "total_labels": 10,
-    }
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
-            status=200,
-            body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}),
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    evaluation_sets = deepset_cloud_document_store.get_evaluation_sets()
-    assert evaluation_sets == [response_evaluation_set]
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_cloud_document_store):
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
-            status=200,
-            body=json.dumps(
-                [
-                    {
-                        "label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
-                        "query": "What is berlin?",
-                        "answer": "biggest city in germany",
-                        "answer_start": 0,
-                        "answer_end": 0,
-                        "meta": {},
-                        "context": "Berlin is the biggest city in germany.",
-                        "external_file_name": "string",
-                        "file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
-                        "state": "Label matching status",
-                        "candidates": "Candidates that were found in the label <-> file matching",
-                    }
-                ]
-            ),
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    labels = deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
-    assert labels == [
-        Label(
-            query="What is berlin?",
-            document=Document(content="Berlin is the biggest city in germany."),
-            is_correct_answer=True,
-            is_correct_document=True,
-            origin="user-feedback",
-            answer=Answer("biggest city in germany"),
-            id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
-            pipeline_id=None,
-            created_at=None,
-            updated_at=None,
-            meta={},
-            filters={},
-        )
-    ]
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set_raises_deepsetclouderror_when_nothing_found(
-    deepset_cloud_document_store,
-):
-    if MOCK_DC:
-        responses.add(
-            method=responses.GET,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{DC_TEST_INDEX}",
-            status=404,
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
-        deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
-
-
-@responses.activate
-def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_store):
-    query_emb = np.random.randn(768)
-    if MOCK_DC:
-        responses.add(
-            method=responses.POST,
-            url=f"{DC_API_ENDPOINT}/workspaces/default/indexes/{DC_TEST_INDEX}/documents-query",
-            match=[
-                matchers.json_params_matcher(
-                    {"query_emb": query_emb.tolist(), "top_k": 10, "return_embedding": False, "scale_score": True}
-                )
-            ],
-            json=[],
-            status=200,
-        )
-    else:
-        responses.add_passthru(DC_API_ENDPOINT)
-
-    emb_docs = deepset_cloud_document_store.query_by_embedding(query_emb)
-    assert len(emb_docs) == 0
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_get_all_docs_without_index():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert document_store.get_all_documents() == []
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_get_all_docs_generator_without_index():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert list(document_store.get_all_documents_generator()) == []
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_get_doc_by_id_without_index():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert document_store.get_document_by_id(id="some id") == None
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_get_docs_by_id_without_index():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert document_store.get_documents_by_id(ids=["some id"]) == []
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_get_doc_count_without_index():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert document_store.get_document_count() == 0
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_query_by_emb_without_index():
-    query_emb = np.random.randn(768)
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert document_store.query_by_embedding(query_emb=query_emb) == []
-
-
-@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
-@responses.activate
-def test_DeepsetCloudDocumentStore_query_without_index():
-    document_store = DeepsetCloudDocumentStore(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY, index=None)
-    assert document_store.query(query="some query") == []
-
-
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 def test_elasticsearch_brownfield_support(document_store_with_docs):
    new_document_store = InMemoryDocumentStore()