DC SDK: Add possibility to upload evaluation sets to DC (#2610)

* Add possibility to upload evaluation sets to DC * fix test_eval sas comparisons * quickwin docstring feedback changes * Add hint about annotation tool and mark optional and required columns * minor changes to docstrings
2026-01-08 13:06:29 +00:00 · 2022-05-31 17:08:19 +02:00 · 2022-05-31 17:08:19 +02:00 · 0efad96e08
commit 0efad96e08
parent fc25adf959
4 changed files with 71 additions and 9 deletions
--- a/haystack/utils/deepsetcloud.py
+++ b/haystack/utils/deepsetcloud.py
@ -795,11 +795,11 @@ class EvaluationSetClient:

        :param evaluation_set: name of the evaluation set for which labels should be fetched
        :param workspace: Specifies the name of the workspace on deepset Cloud.
-                          If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+                          If None, the EvaluationSetClient's default workspace (self.workspace) is used.

        :return: list of Label
        """
-        evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
+        evaluation_set_response = self.get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
        if evaluation_set_response is None:
            raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")

@ -831,16 +831,16 @@ class EvaluationSetClient:
        Counts labels for a given evaluation set in deepset cloud.

        :param evaluation_set: Optional evaluation set in deepset Cloud
-                               If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used.
+                               If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) is used.
        :param workspace: Specifies the name of the workspace on deepset Cloud.
-                          If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+                          If None, the EvaluationSetClient's default workspace (self.workspace) is used.

        :return: Number of labels for the given (or defaulting) index
        """
        if not evaluation_set:
            evaluation_set = self.evaluation_set

-        evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
+        evaluation_set_response = self.get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
        if evaluation_set_response is None:
            raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")

@ -851,7 +851,7 @@ class EvaluationSetClient:
        Searches for all evaluation set names in the given workspace in deepset Cloud.

        :param workspace: Specifies the name of the workspace on deepset Cloud.
-                          If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+                          If None, the EvaluationSetClient's default workspace (self.workspace) is used.

        :return: List of dictionaries that represent deepset Cloud evaluation sets.
                 These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
@ -865,9 +865,51 @@ class EvaluationSetClient:
        evaluation_set_url = f"{url}/evaluation_sets"
        return self.client.get_with_auto_paging(url=evaluation_set_url)

-    def _get_evaluation_set(
+    def upload_evaluation_set(self, file_path: Path, workspace: Optional[str] = None):
+        """
+        Uploads an evaluation set.
+        The name of file that you uploaded becomes the name of the evaluation set in deepset Cloud.
+        When using Haystack annotation tool make sure to choose CSV as export format. The resulting file matches the expected format.
+
+        Currently, deepset Cloud only supports CSV files (having "," as delimiter) with the following columns:
+        - question (or query): the labelled question or query (required)
+        - text: the answer to the question or relevant text to the query (required)
+        - context: the surrounding words of the text (should be more than 100 characters) (optional)
+        - file_name: the name of the file within the workspace that contains the text (optional)
+        - answer_start: the character position within the file that marks the start of the text (optional)
+        - answer_end: the character position within the file that marks the end of the text (optional)
+
+        :param file_path: Path to the evaluation set file to be uploaded.
+        :param workspace: Specifies the name of the workspace on deepset Cloud.
+                          If None, the EvaluationSetClient's default workspace (self.workspace) is used.
+        """
+        workspace_url = self._build_workspace_url(workspace)
+        target_url = f"{workspace_url}/evaluation_sets/import"
+        try:
+            mime_type = guess_type(str(file_path))
+            with open(file_path, "rb") as file:
+                self.client.post(url=target_url, files={"file": (file_path.name, file, mime_type)})
+        except Exception as e:
+            logger.exception(f"Error uploading evaluation set file {file_path}")
+
+        logger.info(
+            f"Successfully uploaded evaluation set file {file_path}. You can access it now under evaluation set '{file_path.name}'."
+        )
+
+    def get_evaluation_set(
        self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None
    ) -> Optional[Dict[str, Any]]:
+        """
+        Returns information about the evaluation set.
+
+        :param evaluation_set: Name of the evaluation set in deepset Cloud.
+                               If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) is used.
+        :param workspace: Specifies the name of the workspace on deepset Cloud.
+                          If None, the EvaluationSetClient's default workspace (self.workspace) is used.
+
+        :return: Dictionary that represents deepset Cloud evaluation sets.
+                 These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+        """
        url = self._build_workspace_url(workspace=workspace)
        evaluation_set_url = f"{url}/evaluation_sets"

--- a/test/others/test_utils.py
+++ b/test/others/test_utils.py
@ -894,3 +894,21 @@ def test_delete_eval_run():

    runs = client.get_eval_runs()
    assert len(runs) == 0
+
+
+@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
+@responses.activate
+def test_upload_eval_set(caplog):
+    if MOCK_DC:
+        responses.add(
+            method=responses.POST,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/import",
+            json={"evaluation_set_id": "c2d06025-2c00-43b5-8f73-b81b12e63afc"},
+            status=200,
+        )
+
+    client = DeepsetCloud.get_evaluation_set_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
+    with caplog.at_level(logging.INFO):
+        client.upload_evaluation_set(file_path=SAMPLES_PATH / "dc/matching_test_1.csv")
+        assert f"Successfully uploaded evaluation set file" in caplog.text
+        assert f"You can access it now under evaluation set 'matching_test_1.csv'." in caplog.text
--- a/test/pipelines/test_eval.py
+++ b/test/pipelines/test_eval.py
@ -654,7 +654,7 @@ def test_extractive_qa_eval_answer_scope(reader, retriever_with_docs):
    assert metrics["Retriever"]["ndcg"] == 0.5
    assert metrics["Reader"]["exact_match"] == 0.5
    assert metrics["Reader"]["f1"] == 0.5
-    assert metrics["Reader"]["sas"] == 0.5
+    assert metrics["Reader"]["sas"] == pytest.approx(0.5)

    metrics = eval_result.calculate_metrics(answer_scope="document_id_and_context")

@ -666,7 +666,7 @@ def test_extractive_qa_eval_answer_scope(reader, retriever_with_docs):
    assert metrics["Retriever"]["ndcg"] == 0.5
    assert metrics["Reader"]["exact_match"] == 0.5
    assert metrics["Reader"]["f1"] == 0.5
-    assert metrics["Reader"]["sas"] == 0.5
+    assert metrics["Reader"]["sas"] == pytest.approx(0.5)


@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
--- a/test/samples/dc/matching_test_1.csv
+++ b/test/samples/dc/matching_test_1.csv
@ -0,0 +1,2 @@
+query,text,context,file_name,answer_start,answer_end
+"What are Primitives?","These are classes that carry data through the system.","# Primitives\n\nIn Haystack, there are a handful of core classes that are regularly used in many different places.\nThese are classes that carry data through the system.\nUsers will likely interact with these as either the input or output of their pipeline.\n\n## Document\n\nThe Document class contains all the information regarding the contents of a document,\nincluding its id and metadata.\nIt may also contain information created in the pipeline including the confidence ","sample_pdf_1.pdf",113,166