Create eval runs on deepset Cloud (#2534)

* add EvaluationRunClient

* Update Documentation & Code Style

* temporarily resolve names to ids

* Update Documentation & Code Style

* add delete and update methods

* minor fixes

* add experiments facade

* dummy implement start_run()

* start eval runs added

* Update Documentation & Code Style

* fix merge

* switch to names on api level

* add create eval_run test

* Update Documentation & Code Style

* further tests added

* update docstrings

* add docstrings

* add missing tags param, fix docstrings

* refactor _get_evaluation_sets

* fix mypy

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
tstadel 2022-05-30 18:18:19 +02:00 committed by GitHub
parent 0395533a78
commit fc25adf959
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1182 additions and 51 deletions

View File

@ -10,7 +10,7 @@ from haystack.utils.doc_store import (
stop_opensearch,
stop_service,
)
from haystack.utils.deepsetcloud import DeepsetCloud, DeepsetCloudError
from haystack.utils.deepsetcloud import DeepsetCloud, DeepsetCloudError, DeepsetCloudExperiments
from haystack.utils.export_utils import (
print_answers,
print_documents,

View File

@ -78,9 +78,10 @@ class DeepsetCloudClient:
A client to communicate with deepset Cloud.
:param api_key: Secret value of the API key.
If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
self.api_key = api_key or os.getenv("DEEPSET_CLOUD_API_KEY")
if self.api_key is None:
@ -233,6 +234,27 @@ class DeepsetCloudClient:
raise_on_error=raise_on_error,
)
def patch(
self,
url: str,
json: dict = None,
data: Any = None,
query_params: dict = None,
stream: bool = False,
headers: dict = None,
raise_on_error: bool = True,
):
return self._execute_request(
method="PATCH",
url=url,
query_params=query_params,
json=json,
data=data,
stream=stream,
headers=headers,
raise_on_error=raise_on_error,
)
def _execute_auto_paging_request(
self,
method: Literal["GET", "POST", "PUT", "HEAD", "DELETE"],
@ -268,7 +290,7 @@ class DeepsetCloudClient:
def _execute_request(
self,
method: Literal["GET", "POST", "PUT", "HEAD", "DELETE"],
method: Literal["GET", "POST", "PUT", "HEAD", "DELETE", "PATCH"],
url: str,
json: dict = None,
data: Any = None,
@ -312,7 +334,7 @@ class IndexClient:
A client to communicate with deepset Cloud indexes.
:param client: deepset Cloud client
:param workspace: workspace in deepset Cloud
:param workspace: Specifies the name of the workspace for which you want to create the client.
:param index: index in deepset Cloud workspace
"""
@ -425,8 +447,8 @@ class PipelineClient:
A client to communicate with deepset Cloud pipelines.
:param client: deepset Cloud client
:param workspace: workspace in deepset Cloud
:param pipeline_config_name: name of the pipeline_config in deepset Cloud workspace
:param workspace: Specifies the name of the workspace for which you want to create the client.
:param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace.
"""
self.client = client
@ -436,6 +458,13 @@ class PipelineClient:
def get_pipeline_config(
self, workspace: Optional[str] = None, pipeline_config_name: Optional[str] = None, headers: dict = None
) -> dict:
"""
Gets the config from a pipeline on deepset Cloud.
:param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call.
"""
pipeline_url = self._build_pipeline_url(workspace=workspace, pipeline_config_name=pipeline_config_name)
pipeline_config_url = f"{pipeline_url}/json"
response = self.client.get(url=pipeline_config_url, headers=headers).json()
@ -444,6 +473,13 @@ class PipelineClient:
def get_pipeline_config_info(
self, workspace: Optional[str] = None, pipeline_config_name: Optional[str] = None, headers: dict = None
) -> Optional[dict]:
"""
Gets information about a pipeline on deepset Cloud.
:param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call.
"""
pipeline_url = self._build_pipeline_url(workspace=workspace, pipeline_config_name=pipeline_config_name)
response = self.client.get(url=pipeline_url, headers=headers, raise_on_error=False)
if response.status_code == 200:
@ -456,6 +492,29 @@ class PipelineClient:
)
def list_pipeline_configs(self, workspace: Optional[str] = None, headers: dict = None) -> Generator:
"""
Lists all pipelines available on deepset Cloud.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call.
Returns:
Generator of dictionaries: List[dict]
each dictionary: {
"name": str -> `pipeline_config_name` to be used in `load_from_deepset_cloud()`,
"..." -> additional pipeline meta information
}
example:
[{'name': 'my_super_nice_pipeline_config',
'pipeline_id': '2184e0c1-c6ec-40a1-9b28-5d2768e5efa2',
'status': 'DEPLOYED',
'created_at': '2022-02-01T09:57:03.803991+00:00',
'deleted': False,
'is_default': False,
'indexing': {'status': 'IN_PROGRESS',
'pending_file_count': 3,
'total_file_count': 31}}]
"""
workspace_url = self._build_workspace_url(workspace)
pipelines_url = f"{workspace_url}/pipelines"
generator = self.client.get_with_auto_paging(url=pipelines_url, headers=headers)
@ -468,6 +527,14 @@ class PipelineClient:
workspace: Optional[str] = None,
headers: dict = None,
):
"""
Saves a pipeline config to deepset Cloud.
:param config: The pipeline config to save.
:param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call.
"""
config["name"] = pipeline_config_name
workspace_url = self._build_workspace_url(workspace=workspace)
pipelines_url = f"{workspace_url}/pipelines"
@ -482,6 +549,14 @@ class PipelineClient:
workspace: Optional[str] = None,
headers: dict = None,
):
"""
Updates a pipeline config on deepset Cloud.
:param config: The pipeline config to save.
:param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call.
"""
config["name"] = pipeline_config_name
pipeline_url = self._build_pipeline_url(workspace=workspace, pipeline_config_name=pipeline_config_name)
yaml_url = f"{pipeline_url}/yaml"
@ -504,9 +579,9 @@ class PipelineClient:
If timeout exceeds a TimeoutError will be raised.
If deployment fails a DeepsetCloudError will be raised.
:param pipeline_config_name: name of the config file inside the deepset Cloud workspace.
:param workspace: workspace in deepset Cloud
:param headers: Headers to pass to API call
:param pipeline_config_name: Name of the config file inside the deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call.
:param timeout: The time in seconds to wait until deployment completes.
If the timeout is exceeded an error will be raised.
:param show_curl_message: Whether to print an additional message after successful deployment showing how to query the pipeline using curl.
@ -574,8 +649,8 @@ class PipelineClient:
If timeout exceeds a TimeoutError will be raised.
If deployment fails a DeepsetCloudError will be raised.
:param pipeline_config_name: name of the config file inside the deepset Cloud workspace.
:param workspace: workspace in deepset Cloud
:param pipeline_config_name: Name of the config file inside the deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call
:param timeout: The time in seconds to wait until undeployment completes.
If the timeout is exceeded an error will be raised.
@ -615,9 +690,9 @@ class PipelineClient:
"""
Transitions the pipeline config state to desired target_state on deepset Cloud.
:param target_state: the target state of the Pipeline config.
:param pipeline_config_name: name of the config file inside the deepset Cloud workspace.
:param workspace: workspace in deepset Cloud
:param target_state: The target state of the Pipeline config.
:param pipeline_config_name: Name of the config file inside the deepset Cloud workspace.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param headers: Headers to pass to API call
:param timeout: The time in seconds to wait until undeployment completes.
If the timeout is exceeded an error will be raised.
@ -705,7 +780,7 @@ class EvaluationSetClient:
A client to communicate with deepset Cloud evaluation sets and labels.
:param client: deepset Cloud client
:param workspace: workspace in deepset Cloud
:param workspace: Specifies the name of the workspace for which you want to create the client.
:param evaluation_set: name of the evaluation set to fall back to
"""
@ -719,20 +794,17 @@ class EvaluationSetClient:
If no labels were found, raises DeepsetCloudError.
:param evaluation_set: name of the evaluation set for which labels should be fetched
:param workspace: Optional workspace in deepset Cloud
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
:return: list of Label
"""
try:
evaluation_sets_response = next(
self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
)
except StopIteration:
evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
if evaluation_set_response is None:
raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
labels = self._get_labels_from_evaluation_set(
workspace=workspace, evaluation_set_id=evaluation_sets_response["evaluation_set_id"]
workspace=workspace, evaluation_set_id=evaluation_set_response["evaluation_set_id"]
)
return [
@ -760,43 +832,59 @@ class EvaluationSetClient:
:param evaluation_set: Optional evaluation set in deepset Cloud
If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used.
:param workspace: Optional workspace in deepset Cloud
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
:return: Number of labels for the given (or defaulting) index
"""
try:
evaluation_sets_response = next(
self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
)
except StopIteration:
if not evaluation_set:
evaluation_set = self.evaluation_set
evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
if evaluation_set_response is None:
raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
return evaluation_sets_response["total_labels"]
return evaluation_set_response["total_labels"]
def get_evaluation_sets(self, workspace: Optional[str] = None) -> List[dict]:
"""
Searches for all evaluation set names in the given workspace in deepset Cloud.
:param workspace: Optional workspace in deepset Cloud
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
:return: List of dictionaries that represent deepset Cloud evaluation sets.
These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
"""
evaluation_sets_response = self._get_evaluation_set(evaluation_set=None, workspace=workspace)
evaluation_sets_response = self._get_evaluation_sets(workspace=workspace)
return [eval_set for eval_set in evaluation_sets_response]
def _get_evaluation_set(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> Generator:
if not evaluation_set:
evaluation_set = self.evaluation_set
def _get_evaluation_sets(self, workspace: Optional[str] = None) -> Generator:
url = self._build_workspace_url(workspace=workspace)
evaluation_set_url = f"{url}/evaluation_sets"
return self.client.get_with_auto_paging(url=evaluation_set_url)
def _get_evaluation_set(
self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None
) -> Optional[Dict[str, Any]]:
url = self._build_workspace_url(workspace=workspace)
evaluation_set_url = f"{url}/evaluation_sets"
for response in self.client.get_with_auto_paging(url=evaluation_set_url, query_params={"name": evaluation_set}):
yield response
# evaluation_sets resource uses ids instead of names,
# so we have to query by name (which works as a contains filter) and take the first entry with matching name
query_params = {}
if evaluation_set is not None:
query_params["name"] = evaluation_set
matches = [
entry
for entry in self.client.get_with_auto_paging(url=evaluation_set_url, query_params=query_params)
if entry["name"] == evaluation_set
]
if any(matches):
return matches[0]
return None
def _get_labels_from_evaluation_set(
self, workspace: Optional[str] = None, evaluation_set_id: Optional[str] = None
@ -819,8 +907,7 @@ class FileClient:
A client to manage files on deepset Cloud.
:param client: deepset Cloud client
:param workspace: workspace in deepset Cloud
:param workspace: Specifies the name of the workspace for which you want to create the client.
"""
self.client = client
self.workspace = workspace
@ -832,6 +919,15 @@ class FileClient:
workspace: Optional[str] = None,
headers: dict = None,
):
"""
Uploads files to the deepset Cloud workspace.
:param file_paths: File paths to upload (for example .txt or .pdf files)
:param metas: Metadata of the files to upload
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the FileClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
files_url = f"{workspace_url}/files"
if metas is None:
@ -856,6 +952,14 @@ class FileClient:
logger.info(f"Successfully uploaded {len(file_ids)} files.")
def delete_file(self, file_id: str, workspace: Optional[str] = None, headers: dict = None):
"""
Delete a file from the deepset Cloud workspace.
:param file_id: The id of the file to be deleted. Use `list_files` to retrieve the id of a file.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the FileClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
file_url = f"{workspace_url}/files/{file_id}"
self.client.delete(url=file_url, headers=headers)
@ -868,6 +972,17 @@ class FileClient:
workspace: Optional[str] = None,
headers: dict = None,
) -> Generator:
"""
List all files in the given deepset Cloud workspace.
You can filter by name or by meta values.
:param name: The name or part of the name of the file.
:param meta_key: The key of the metadata of the file to be filtered for.
:param meta_value: The value of the metadata of the file to be filtered for.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the FileClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
files_url = f"{workspace_url}/files"
query_params = {"name": name, "meta_key": meta_key, "meta_value": meta_value}
@ -880,6 +995,168 @@ class FileClient:
return self.client.build_workspace_url(workspace)
class EvaluationRunClient:
def __init__(self, client: DeepsetCloudClient, workspace: Optional[str] = None):
"""
A client to manage deepset Cloud evaluation runs.
:param client: deepset Cloud client
:param workspace: Specifies the name of the workspace for which you want to create the client.
"""
self.client = client
self.workspace = workspace
def create_eval_run(
self,
eval_run_name: str,
workspace: Optional[str] = None,
pipeline_config_name: Optional[str] = None,
headers: dict = None,
evaluation_set: Optional[str] = None,
eval_mode: Literal["integrated", "isolated"] = "integrated",
debug: bool = False,
comment: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Creates an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param pipeline_config_name: The name of the pipeline to evaluate.
:param evaluation_set: The name of the evaluation set to use.
:param eval_mode: The evaluation mode to use.
:param debug: Wheter to enable debug output.
:param comment: Comment to add about to the evaluation run.
:param tags: Tags to add to the evaluation run.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
eval_run_url = f"{workspace_url}/eval_runs"
response = self.client.post(
eval_run_url,
json={
"pipeline_name": pipeline_config_name,
"evaluation_set_name": evaluation_set,
"debug": debug,
"eval_mode": 0 if eval_mode == "integrated" else 1,
"comment": comment,
"name": eval_run_name,
"tags": tags,
},
headers=headers,
)
return response.json()["data"]
def get_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None) -> Dict[str, Any]:
"""
Gets the evaluation run and shows its parameters and metrics.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}"
response = self.client.get(eval_run_url, headers=headers)
return response.json()
def get_eval_runs(self, workspace: Optional[str] = None, headers: dict = None) -> List[Dict[str, Any]]:
"""
Gets all evaluation runs and shows its parameters and metrics.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
eval_run_url = f"{workspace_url}/eval_runs"
response = self.client.get_with_auto_paging(eval_run_url, headers=headers)
return [eval_run for eval_run in response]
def delete_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None):
"""
Deletes an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}"
response = self.client.delete(eval_run_url, headers=headers)
if response.status_code == 204:
logger.info(f"Evaluation run '{eval_run_name}' deleted.")
def start_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None):
"""
Starts an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}/start"
response = self.client.post(eval_run_url, headers=headers)
if response.status_code == 204:
logger.info(f"Evaluation run '{eval_run_name}' has been started.")
def update_eval_run(
self,
eval_run_name: str,
workspace: Optional[str] = None,
pipeline_config_name: Optional[str] = None,
headers: dict = None,
evaluation_set: Optional[str] = None,
eval_mode: Literal["integrated", "isolated", None] = None,
debug: Optional[bool] = None,
comment: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Updates an evaluation run.
:param eval_run_name: The name of the evaluation run to update.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the FileClient's default workspace is used.
:param pipeline_config_name: The name of the pipeline to evaluate.
:param evaluation_set: The name of the evaluation set to use.
:param eval_mode: The evaluation mode to use.
:param debug: Wheter to enable debug output.
:param comment: Comment to add about to the evaluation run.
:param tags: Tags to add to the evaluation run.
:param headers: Headers to pass to API call
"""
workspace_url = self._build_workspace_url(workspace)
eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}"
eval_mode_param = None
if eval_mode is not None:
eval_mode_param = 0 if eval_mode == "integrated" else 1
response = self.client.patch(
eval_run_url,
json={
"pipeline_name": pipeline_config_name,
"evaluation_set_name": evaluation_set,
"debug": debug,
"eval_mode": eval_mode_param,
"comment": comment,
"tags": tags,
},
headers=headers,
)
return response.json()["data"]
def _build_workspace_url(self, workspace: Optional[str] = None):
if workspace is None:
workspace = self.workspace
return self.client.build_workspace_url(workspace)
class DeepsetCloud:
"""
A facade to communicate with deepset Cloud.
@ -897,10 +1174,11 @@ class DeepsetCloud:
Creates a client to communicate with deepset Cloud indexes.
:param api_key: Secret value of the API key.
If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
:param workspace: workspace in deepset Cloud
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
:param workspace: Specifies the name of the workspace for which you want to create the client.
:param index: index in deepset Cloud workspace
"""
@ -919,10 +1197,11 @@ class DeepsetCloud:
Creates a client to communicate with deepset Cloud pipelines.
:param api_key: Secret value of the API key.
If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
:param workspace: workspace in deepset Cloud
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
:param workspace: Specifies the name of the workspace for which you want to create the client.
:param pipeline_config_name: name of the pipeline_config in deepset Cloud workspace
"""
@ -941,16 +1220,35 @@ class DeepsetCloud:
Creates a client to communicate with deepset Cloud labels.
:param api_key: Secret value of the API key.
If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
:param workspace: workspace in deepset Cloud
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
:param workspace: Specifies the name of the workspace for which you want to create the client.
:param evaluation_set: name of the evaluation set in deepset Cloud
"""
client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
return EvaluationSetClient(client=client, workspace=workspace, evaluation_set=evaluation_set)
@classmethod
def get_eval_run_client(
cls, api_key: Optional[str] = None, api_endpoint: Optional[str] = None, workspace: str = "default"
) -> EvaluationRunClient:
"""
Creates a client to manage evaluation runs on deepset Cloud.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
:param workspace: Specifies the name of the workspace for which you want to create the client.
"""
client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
return EvaluationRunClient(client=client, workspace=workspace)
@classmethod
def get_file_client(
cls, api_key: Optional[str] = None, api_endpoint: Optional[str] = None, workspace: str = "default"
@ -959,11 +1257,341 @@ class DeepsetCloud:
Creates a client to manage files on deepset Cloud.
:param api_key: Secret value of the API key.
If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
:param workspace: workspace in deepset Cloud
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
:param workspace: Specifies the name of the workspace for which you want to create the client.
"""
client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
return FileClient(client=client, workspace=workspace)
class DeepsetCloudExperiments:
"""
A facade to conduct and manage experiments within deepset Cloud.
To start a new experiment run:
1. Choose a pipeline to evaluate using `list_pipelines()`.
2. Choose an evaluation set using `list_evaluation_sets()`.
3. Create and start a new run using `create_and_start_run()`.
4. Track the run using `get_run()`.
"""
@classmethod
def list_pipelines(
cls, workspace: str = "default", api_key: Optional[str] = None, api_endpoint: Optional[str] = None
) -> List[dict]:
"""
Lists all pipelines available on deepset Cloud.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
Returns:
list of dictionaries: List[dict]
each dictionary: {
"name": str -> `pipeline_config_name` to be used in `load_from_deepset_cloud()`,
"..." -> additional pipeline meta information
}
example:
[{'name': 'my_super_nice_pipeline_config',
'pipeline_id': '2184e0c1-c6ec-40a1-9b28-5d2768e5efa2',
'status': 'DEPLOYED',
'created_at': '2022-02-01T09:57:03.803991+00:00',
'deleted': False,
'is_default': False,
'indexing': {'status': 'IN_PROGRESS',
'pending_file_count': 3,
'total_file_count': 31}}]
"""
client = DeepsetCloud.get_pipeline_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
pipeline_config_infos = list(client.list_pipeline_configs())
return pipeline_config_infos
@classmethod
def list_evaluation_sets(
cls, workspace: str = "default", api_key: Optional[str] = None, api_endpoint: Optional[str] = None
) -> List[dict]:
"""
Lists all evaluation sets available on deepset Cloud.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
Returns:
list of dictionaries: List[dict]
each dictionary: {
"name": str -> `evaluation_set` to be used in `create_run()`,
"..." -> additional pipeline meta information
}
example:
[{'evaluation_set_id': 'fb084729-57ad-4b57-9f78-ec0eb4d29c9f',
'name': 'my-question-answering-evaluation-set',
'created_at': '2022-05-06T09:54:14.830529+00:00',
'matched_labels': 234,
'total_labels': 234}]
"""
client = DeepsetCloud.get_evaluation_set_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
return client.get_evaluation_sets()
@classmethod
def get_runs(
cls, workspace: str = "default", api_key: Optional[str] = None, api_endpoint: Optional[str] = None
) -> List[dict]:
"""
Gets all evaluation runs.
:param workspace: Specifies the name of the workspace on deepset Cloud.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
Returns:
list of dictionaries: List[dict]
example:
[{'eval_run_name': 'my-eval-run-1',
'parameters': {
'pipeline_name': 'my-pipeline-1_696bc5d0-ee65-46c1-a308-059507bc353b',
'evaluation_set_name': 'my-eval-set-name',
'debug': False,
'eval_mode': 0
},
'metrics': {
'isolated_exact_match': 0.45,
'isolated_f1': 0.89,
'isolated_sas': 0.91,
'integrated_exact_match': 0.39,
'integrated_f1': 0.76,
'integrated_sas': 0.78,
'mean_reciprocal_rank': 0.77,
'mean_average_precision': 0.78,
'recall_single_hit': 0.91,
'recall_multi_hit': 0.91,
'normal_discounted_cummulative_gain': 0.83,
'precision': 0.52
},
'logs': {},
'status': 1,
'eval_mode': 0,
'eval_run_labels': [],
'created_at': '2022-05-24T12:13:16.445857+00:00',
'comment': 'This is a comment about thiseval run',
'tags': ['experiment-1', 'experiment-2', 'experiment-3']
}]
"""
client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
return client.get_eval_runs()
@classmethod
def create_run(
cls,
eval_run_name: str,
workspace: str = "default",
api_key: Optional[str] = None,
api_endpoint: Optional[str] = None,
pipeline_config_name: Optional[str] = None,
evaluation_set: Optional[str] = None,
eval_mode: Literal["integrated", "isolated"] = "integrated",
debug: bool = False,
comment: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Creates an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param pipeline_config_name: The name of the pipeline to evaluate. Use `list_pipelines()` to list all available pipelines.
:param evaluation_set: The name of the evaluation set to use. Use `list_evaluation_sets()` to list all available evaluation sets.
:param eval_mode: The evaluation mode to use.
:param debug: Wheter to enable debug output.
:param comment: Comment to add about to the evaluation run.
:param tags: Tags to add to the evaluation run.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
return client.create_eval_run(
eval_run_name=eval_run_name,
pipeline_config_name=pipeline_config_name,
evaluation_set=evaluation_set,
eval_mode=eval_mode,
debug=debug,
comment=comment,
tags=tags,
)
@classmethod
def update_run(
cls,
eval_run_name: str,
workspace: str = "default",
api_key: Optional[str] = None,
api_endpoint: Optional[str] = None,
pipeline_config_name: Optional[str] = None,
evaluation_set: Optional[str] = None,
eval_mode: Literal["integrated", "isolated"] = "integrated",
debug: bool = False,
comment: Optional[str] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Updates an evaluation run.
:param eval_run_name: The name of the evaluation run to update.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the FileClient's default workspace is used.
:param pipeline_config_name: The name of the pipeline to evaluate. Use `list_pipelines()` to list all available pipelines.
:param evaluation_set: The name of the evaluation set to use. Use `list_evaluation_sets()` to list all available evaluation sets.
:param eval_mode: The evaluation mode to use.
:param debug: Wheter to enable debug output.
:param comment: Comment to add about to the evaluation run.
:param tags: Tags to add to the evaluation run.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
return client.update_eval_run(
eval_run_name=eval_run_name,
pipeline_config_name=pipeline_config_name,
evaluation_set=evaluation_set,
eval_mode=eval_mode,
debug=debug,
comment=comment,
tags=tags,
)
@classmethod
def get_run(
cls,
eval_run_name: str,
workspace: str = "default",
api_key: Optional[str] = None,
api_endpoint: Optional[str] = None,
) -> Dict[str, Any]:
"""
Gets the evaluation run and shows its parameters and metrics.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
return client.get_eval_run(eval_run_name=eval_run_name)
@classmethod
def delete_run(
cls,
eval_run_name: str,
workspace: str = "default",
api_key: Optional[str] = None,
api_endpoint: Optional[str] = None,
):
"""
Deletes an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
return client.delete_eval_run(eval_run_name=eval_run_name)
@classmethod
def start_run(
cls,
eval_run_name: str,
workspace: str = "default",
api_key: Optional[str] = None,
api_endpoint: Optional[str] = None,
):
"""
Starts an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace)
client.start_eval_run(eval_run_name=eval_run_name)
logger.info("You can check run progess by inspecting the `status` field returned from `get_run()`.")
@classmethod
def create_and_start_run(
cls,
eval_run_name: str,
workspace: str = "default",
api_key: Optional[str] = None,
api_endpoint: Optional[str] = None,
pipeline_config_name: Optional[str] = None,
evaluation_set: Optional[str] = None,
eval_mode: Literal["integrated", "isolated"] = "integrated",
debug: bool = False,
comment: Optional[str] = None,
tags: Optional[List[str]] = None,
):
"""
Creates and starts an evaluation run.
:param eval_run_name: The name of the evaluation run.
:param workspace: Specifies the name of the workspace on deepset Cloud.
If None, the EvaluationRunClient's default workspace is used.
:param pipeline_config_name: The name of the pipeline to evaluate. Use `list_pipelines()` to list all available pipelines.
:param evaluation_set: The name of the evaluation set to use. Use `list_evaluation_sets()` to list all available evaluation sets.
:param eval_mode: The evaluation mode to use.
:param debug: Wheter to enable debug output.
:param comment: Comment to add about to the evaluation run.
:param tags: Tags to add to the evaluation run.
:param api_key: Secret value of the API key.
If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable.
:param api_endpoint: The URL of the deepset Cloud API.
If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'.
"""
cls.create_run(
eval_run_name=eval_run_name,
workspace=workspace,
api_key=api_key,
api_endpoint=api_endpoint,
pipeline_config_name=pipeline_config_name,
evaluation_set=evaluation_set,
eval_mode=eval_mode,
debug=debug,
comment=comment,
tags=tags,
)
cls.start_run(eval_run_name=eval_run_name, workspace=workspace, api_key=api_key, api_endpoint=api_endpoint)

View File

@ -5,6 +5,7 @@ import pandas as pd
from pathlib import Path
import responses
from responses import matchers
from haystack.utils.deepsetcloud import DeepsetCloud
from haystack.utils.preprocessing import convert_files_to_docs, tika_convert_files_to_docs
@ -391,3 +392,505 @@ def test_list_files_on_deepset_cloud():
assert len(files) == 2
assert files[0]["name"] == "sample_pdf_1.pdf"
assert files[1]["name"] == "sample_pdf_2.pdf"
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
@responses.activate
def test_create_eval_run():
if MOCK_DC:
responses.add(
method=responses.POST,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={"data": {"eval_run_name": "my-eval-run-1"}},
status=200,
match=[
matchers.json_params_matcher(
{
"name": "my-eval-run-1",
"pipeline_name": "my-pipeline-1",
"evaluation_set_name": "my-eval-set-1",
"eval_mode": 0,
"comment": "this is my first run",
"debug": False,
"tags": ["my-experiment-1"],
}
)
],
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={
"data": [
{
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-1",
},
"status": 1,
}
],
"has_more": False,
"total": 1,
},
status=200,
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1",
json={
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-1",
},
"status": 1,
},
status=200,
)
client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
client.create_eval_run(
eval_run_name="my-eval-run-1",
pipeline_config_name="my-pipeline-1",
evaluation_set="my-eval-set-1",
eval_mode="integrated",
comment="this is my first run",
tags=["my-experiment-1"],
)
runs = client.get_eval_runs()
assert len(runs) == 1
assert runs[0]["name"] == "my-eval-run-1"
assert runs[0]["tags"] == ["my-experiment-1"]
assert runs[0]["comment"] == "this is my first run"
assert runs[0]["parameters"]["pipeline_name"] == "my-pipeline-1"
assert runs[0]["parameters"]["evaluation_set_name"] == "my-eval-set-1"
run = client.get_eval_run("my-eval-run-1")
assert run["name"] == "my-eval-run-1"
assert run["tags"] == ["my-experiment-1"]
assert run["comment"] == "this is my first run"
assert run["parameters"]["pipeline_name"] == "my-pipeline-1"
assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1"
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
@responses.activate
def test_update_eval_run():
if MOCK_DC:
responses.add(
method=responses.POST,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={"data": {"eval_run_name": "my-eval-run-1"}},
status=200,
match=[
matchers.json_params_matcher(
{
"name": "my-eval-run-1",
"pipeline_name": "my-pipeline-1",
"evaluation_set_name": "my-eval-set-1",
"eval_mode": 0,
"comment": "this is my first run",
"debug": False,
"tags": ["my-experiment-1"],
}
)
],
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1",
json={
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-1",
},
"status": "CREATED",
},
status=200,
)
responses.add(
method=responses.PATCH,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1",
json={"data": {"eval_run_name": "my-eval-run-1"}},
status=200,
match=[
matchers.json_params_matcher(
{"pipeline_name": "my-pipeline-2", "comment": "this is my first run with second pipeline"}
)
],
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1",
json={
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run with second pipeline",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-2",
},
"status": "CREATED",
},
status=200,
)
client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
client.create_eval_run(
eval_run_name="my-eval-run-1",
pipeline_config_name="my-pipeline-1",
evaluation_set="my-eval-set-1",
eval_mode="integrated",
comment="this is my first run",
tags=["my-experiment-1"],
)
run = client.get_eval_run("my-eval-run-1")
assert run["name"] == "my-eval-run-1"
assert run["tags"] == ["my-experiment-1"]
assert run["comment"] == "this is my first run"
assert run["parameters"]["pipeline_name"] == "my-pipeline-1"
assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1"
client.update_eval_run(
eval_run_name="my-eval-run-1",
pipeline_config_name="my-pipeline-2",
comment="this is my first run with second pipeline",
)
run = client.get_eval_run("my-eval-run-1")
assert run["name"] == "my-eval-run-1"
assert run["tags"] == ["my-experiment-1"]
assert run["comment"] == "this is my first run with second pipeline"
assert run["parameters"]["pipeline_name"] == "my-pipeline-2"
assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1"
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
@responses.activate
def test_start_eval_run():
if MOCK_DC:
responses.add(
method=responses.POST,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={"data": {"eval_run_name": "my-eval-run-1"}},
status=200,
match=[
matchers.json_params_matcher(
{
"name": "my-eval-run-1",
"pipeline_name": "my-pipeline-1",
"evaluation_set_name": "my-eval-set-1",
"eval_mode": 0,
"comment": "this is my first run",
"debug": False,
"tags": ["my-experiment-1"],
}
)
],
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1",
json={
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-1",
},
"status": "CREATED",
},
status=200,
)
responses.add(
method=responses.POST,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1/start",
json={},
status=200,
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1",
json={
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-1",
},
"status": "STARTED",
},
status=200,
)
client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
client.create_eval_run(
eval_run_name="my-eval-run-1",
pipeline_config_name="my-pipeline-1",
evaluation_set="my-eval-set-1",
eval_mode="integrated",
comment="this is my first run",
tags=["my-experiment-1"],
)
run = client.get_eval_run("my-eval-run-1")
assert run["name"] == "my-eval-run-1"
assert run["tags"] == ["my-experiment-1"]
assert run["comment"] == "this is my first run"
assert run["parameters"]["pipeline_name"] == "my-pipeline-1"
assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1"
assert run["status"] == "CREATED"
client.start_eval_run(eval_run_name="my-eval-run-1")
run = client.get_eval_run("my-eval-run-1")
assert run["name"] == "my-eval-run-1"
assert run["tags"] == ["my-experiment-1"]
assert run["comment"] == "this is my first run"
assert run["parameters"]["pipeline_name"] == "my-pipeline-1"
assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1"
assert run["status"] == "STARTED"
@pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
@responses.activate
def test_delete_eval_run():
if MOCK_DC:
responses.add(
method=responses.POST,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={"data": {"eval_run_name": "my-eval-run-1"}},
status=200,
match=[
matchers.json_params_matcher(
{
"name": "my-eval-run-1",
"pipeline_name": "my-pipeline-1",
"evaluation_set_name": "my-eval-set-1",
"eval_mode": 0,
"comment": "this is my first run",
"debug": False,
"tags": ["my-experiment-1"],
}
)
],
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={
"data": [
{
"created_at": "2022-05-24T12:13:16.445857+00:00",
"eval_mode": 0,
"eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c",
"name": "my-eval-run-1",
"comment": "this is my first run",
"tags": ["my-experiment-1"],
"eval_run_labels": [],
"logs": {},
"metrics": {
"integrated_exact_match": None,
"integrated_f1": None,
"integrated_sas": None,
"isolated_exact_match": None,
"isolated_f1": None,
"isolated_sas": None,
"mean_average_precision": None,
"mean_reciprocal_rank": None,
"normal_discounted_cummulative_gain": None,
"precision": None,
"recall_multi_hit": None,
"recall_single_hit": None,
},
"parameters": {
"debug": False,
"eval_mode": 0,
"evaluation_set_name": "my-eval-set-1",
"pipeline_name": "my-pipeline-1",
},
"status": 1,
}
],
"has_more": False,
"total": 1,
},
status=200,
)
responses.add(
method=responses.DELETE, url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", status=204
)
responses.add(
method=responses.GET,
url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs",
json={"data": [], "has_more": False, "total": 0},
status=200,
)
client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY)
client.create_eval_run(
eval_run_name="my-eval-run-1",
pipeline_config_name="my-pipeline-1",
evaluation_set="my-eval-set-1",
eval_mode="integrated",
comment="this is my first run",
tags=["my-experiment-1"],
)
runs = client.get_eval_runs()
assert len(runs) == 1
run = client.delete_eval_run("my-eval-run-1")
runs = client.get_eval_runs()
assert len(runs) == 0