DC SDK: Add possibility to upload evaluation sets to DC (#2610)

* Add possibility to upload evaluation sets to DC

* fix test_eval sas comparisons

* quickwin docstring feedback changes

* Add hint about annotation tool and mark optional and required columns

* minor changes to docstrings
This commit is contained in:
tstadel 2022-05-31 17:08:19 +02:00 committed by GitHub
parent fc25adf959
commit 0efad96e08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 71 additions and 9 deletions

View File

@ -795,11 +795,11 @@ class EvaluationSetClient:
:param evaluation_set: name of the evaluation set for which labels should be fetched
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
If None, the EvaluationSetClient's default workspace (self.workspace) is used.
:return: list of Label
"""
evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
evaluation_set_response = self.get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
if evaluation_set_response is None:
raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
@ -831,16 +831,16 @@ class EvaluationSetClient:
Counts labels for a given evaluation set in deepset cloud.
:param evaluation_set: Optional evaluation set in deepset Cloud
If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used.
If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) is used.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
If None, the EvaluationSetClient's default workspace (self.workspace) is used.
:return: Number of labels for the given (or defaulting) index
"""
if not evaluation_set:
evaluation_set = self.evaluation_set
evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
evaluation_set_response = self.get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
if evaluation_set_response is None:
raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
@ -851,7 +851,7 @@ class EvaluationSetClient:
Searches for all evaluation set names in the given workspace in deepset Cloud.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
If None, the EvaluationSetClient's default workspace (self.workspace) is used.
:return: List of dictionaries that represent deepset Cloud evaluation sets.
These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
@ -865,9 +865,51 @@ class EvaluationSetClient:
evaluation_set_url = f"{url}/evaluation_sets"
return self.client.get_with_auto_paging(url=evaluation_set_url)
def _get_evaluation_set(
def upload_evaluation_set(self, file_path: Path, workspace: Optional[str] = None):
"""
Uploads an evaluation set.
The name of file that you uploaded becomes the name of the evaluation set in deepset Cloud.
When using Haystack annotation tool make sure to choose CSV as export format. The resulting file matches the expected format.
Currently, deepset Cloud only supports CSV files (having "," as delimiter) with the following columns:
- question (or query): the labelled question or query (required)
- text: the answer to the question or relevant text to the query (required)
- context: the surrounding words of the text (should be more than 100 characters) (optional)
- file_name: the name of the file within the workspace that contains the text (optional)
- answer_start: the character position within the file that marks the start of the text (optional)
- answer_end: the character position within the file that marks the end of the text (optional)
:param file_path: Path to the evaluation set file to be uploaded.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) is used.
"""
workspace_url = self._build_workspace_url(workspace)
target_url = f"{workspace_url}/evaluation_sets/import"
try:
mime_type = guess_type(str(file_path))
with open(file_path, "rb") as file:
self.client.post(url=target_url, files={"file": (file_path.name, file, mime_type)})
except Exception as e:
logger.exception(f"Error uploading evaluation set file {file_path}")
logger.info(
f"Successfully uploaded evaluation set file {file_path}. You can access it now under evaluation set '{file_path.name}'."
)
def get_evaluation_set(
self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Returns information about the evaluation set.
:param evaluation_set: Name of the evaluation set in deepset Cloud.
If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) is used.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) is used.
:return: Dictionary that represents deepset Cloud evaluation sets.
These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
"""
url = self._build_workspace_url(workspace=workspace)
evaluation_set_url = f"{url}/evaluation_sets"

View File

@ -894,3 +894,21 @@ def test_delete_eval_run():
runs = client.get_eval_runs()
assert len(runs) == 0
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
@responses.activate
def test_upload_eval_set(caplog):
if MOCK_DC:
responses.add(
method=responses.POST,
url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/import",
json={"evaluation_set_id": "c2d06025-2c00-43b5-8f73-b81b12e63afc"},
status=200,
)
client = DeepsetCloud.get_evaluation_set_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
with caplog.at_level(logging.INFO):
client.upload_evaluation_set(file_path=SAMPLES_PATH / "dc/matching_test_1.csv")
assert f"Successfully uploaded evaluation set file" in caplog.text
assert f"You can access it now under evaluation set 'matching_test_1.csv'." in caplog.text

View File

@ -654,7 +654,7 @@ def test_extractive_qa_eval_answer_scope(reader, retriever_with_docs):
assert metrics["Retriever"]["ndcg"] == 0.5
assert metrics["Reader"]["exact_match"] == 0.5
assert metrics["Reader"]["f1"] == 0.5
assert metrics["Reader"]["sas"] == 0.5
assert metrics["Reader"]["sas"] == pytest.approx(0.5)
metrics = eval_result.calculate_metrics(answer_scope="document_id_and_context")
@ -666,7 +666,7 @@ def test_extractive_qa_eval_answer_scope(reader, retriever_with_docs):
assert metrics["Retriever"]["ndcg"] == 0.5
assert metrics["Reader"]["exact_match"] == 0.5
assert metrics["Reader"]["f1"] == 0.5
assert metrics["Reader"]["sas"] == 0.5
assert metrics["Reader"]["sas"] == pytest.approx(0.5)
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)

View File

@ -0,0 +1,2 @@
query,text,context,file_name,answer_start,answer_end
"What are Primitives?","These are classes that carry data through the system.","# Primitives\n\nIn Haystack, there are a handful of core classes that are regularly used in many different places.\nThese are classes that carry data through the system.\nUsers will likely interact with these as either the input or output of their pipeline.\n\n## Document\n\nThe Document class contains all the information regarding the contents of a document,\nincluding its id and metadata.\nIt may also contain information created in the pipeline including the confidence ","sample_pdf_1.pdf",113,166
1 query text context file_name answer_start answer_end
2 What are Primitives? These are classes that carry data through the system. # Primitives\n\nIn Haystack, there are a handful of core classes that are regularly used in many different places.\nThese are classes that carry data through the system.\nUsers will likely interact with these as either the input or output of their pipeline.\n\n## Document\n\nThe Document class contains all the information regarding the contents of a document,\nincluding its id and metadata.\nIt may also contain information created in the pipeline including the confidence sample_pdf_1.pdf 113 166