From fc25adf959760c647b1a0dc3883fa1abb70734cf Mon Sep 17 00:00:00 2001 From: tstadel <60758086+tstadel@users.noreply.github.com> Date: Mon, 30 May 2022 18:18:19 +0200 Subject: [PATCH] Create eval runs on deepset Cloud (#2534) * add EvaluationRunClient * Update Documentation & Code Style * temporarily resolve names to ids * Update Documentation & Code Style * add delete and update methods * minor fixes * add experiments facade * dummy implement start_run() * start eval runs added * Update Documentation & Code Style * fix merge * switch to names on api level * add create eval_run test * Update Documentation & Code Style * further tests added * update docstrings * add docstrings * add missing tags param, fix docstrings * refactor _get_evaluation_sets * fix mypy Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- haystack/utils/__init__.py | 2 +- haystack/utils/deepsetcloud.py | 728 ++++++++++++++++++++++++++++++--- test/others/test_utils.py | 503 +++++++++++++++++++++++ 3 files changed, 1182 insertions(+), 51 deletions(-) diff --git a/haystack/utils/__init__.py b/haystack/utils/__init__.py index fb2555e0f..a7167815b 100644 --- a/haystack/utils/__init__.py +++ b/haystack/utils/__init__.py @@ -10,7 +10,7 @@ from haystack.utils.doc_store import ( stop_opensearch, stop_service, ) -from haystack.utils.deepsetcloud import DeepsetCloud, DeepsetCloudError +from haystack.utils.deepsetcloud import DeepsetCloud, DeepsetCloudError, DeepsetCloudExperiments from haystack.utils.export_utils import ( print_answers, print_documents, diff --git a/haystack/utils/deepsetcloud.py b/haystack/utils/deepsetcloud.py index 5ff96e81a..5510d1045 100644 --- a/haystack/utils/deepsetcloud.py +++ b/haystack/utils/deepsetcloud.py @@ -78,9 +78,10 @@ class DeepsetCloudClient: A client to communicate with deepset Cloud. :param api_key: Secret value of the API key. - If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. :param api_endpoint: The URL of the deepset Cloud API. - If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. """ self.api_key = api_key or os.getenv("DEEPSET_CLOUD_API_KEY") if self.api_key is None: @@ -233,6 +234,27 @@ class DeepsetCloudClient: raise_on_error=raise_on_error, ) + def patch( + self, + url: str, + json: dict = None, + data: Any = None, + query_params: dict = None, + stream: bool = False, + headers: dict = None, + raise_on_error: bool = True, + ): + return self._execute_request( + method="PATCH", + url=url, + query_params=query_params, + json=json, + data=data, + stream=stream, + headers=headers, + raise_on_error=raise_on_error, + ) + def _execute_auto_paging_request( self, method: Literal["GET", "POST", "PUT", "HEAD", "DELETE"], @@ -268,7 +290,7 @@ class DeepsetCloudClient: def _execute_request( self, - method: Literal["GET", "POST", "PUT", "HEAD", "DELETE"], + method: Literal["GET", "POST", "PUT", "HEAD", "DELETE", "PATCH"], url: str, json: dict = None, data: Any = None, @@ -312,7 +334,7 @@ class IndexClient: A client to communicate with deepset Cloud indexes. :param client: deepset Cloud client - :param workspace: workspace in deepset Cloud + :param workspace: Specifies the name of the workspace for which you want to create the client. :param index: index in deepset Cloud workspace """ @@ -425,8 +447,8 @@ class PipelineClient: A client to communicate with deepset Cloud pipelines. :param client: deepset Cloud client - :param workspace: workspace in deepset Cloud - :param pipeline_config_name: name of the pipeline_config in deepset Cloud workspace + :param workspace: Specifies the name of the workspace for which you want to create the client. + :param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace. """ self.client = client @@ -436,6 +458,13 @@ class PipelineClient: def get_pipeline_config( self, workspace: Optional[str] = None, pipeline_config_name: Optional[str] = None, headers: dict = None ) -> dict: + """ + Gets the config from a pipeline on deepset Cloud. + + :param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param headers: Headers to pass to API call. + """ pipeline_url = self._build_pipeline_url(workspace=workspace, pipeline_config_name=pipeline_config_name) pipeline_config_url = f"{pipeline_url}/json" response = self.client.get(url=pipeline_config_url, headers=headers).json() @@ -444,6 +473,13 @@ class PipelineClient: def get_pipeline_config_info( self, workspace: Optional[str] = None, pipeline_config_name: Optional[str] = None, headers: dict = None ) -> Optional[dict]: + """ + Gets information about a pipeline on deepset Cloud. + + :param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param headers: Headers to pass to API call. + """ pipeline_url = self._build_pipeline_url(workspace=workspace, pipeline_config_name=pipeline_config_name) response = self.client.get(url=pipeline_url, headers=headers, raise_on_error=False) if response.status_code == 200: @@ -456,6 +492,29 @@ class PipelineClient: ) def list_pipeline_configs(self, workspace: Optional[str] = None, headers: dict = None) -> Generator: + """ + Lists all pipelines available on deepset Cloud. + + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param headers: Headers to pass to API call. + + Returns: + Generator of dictionaries: List[dict] + each dictionary: { + "name": str -> `pipeline_config_name` to be used in `load_from_deepset_cloud()`, + "..." -> additional pipeline meta information + } + example: + [{'name': 'my_super_nice_pipeline_config', + 'pipeline_id': '2184e0c1-c6ec-40a1-9b28-5d2768e5efa2', + 'status': 'DEPLOYED', + 'created_at': '2022-02-01T09:57:03.803991+00:00', + 'deleted': False, + 'is_default': False, + 'indexing': {'status': 'IN_PROGRESS', + 'pending_file_count': 3, + 'total_file_count': 31}}] + """ workspace_url = self._build_workspace_url(workspace) pipelines_url = f"{workspace_url}/pipelines" generator = self.client.get_with_auto_paging(url=pipelines_url, headers=headers) @@ -468,6 +527,14 @@ class PipelineClient: workspace: Optional[str] = None, headers: dict = None, ): + """ + Saves a pipeline config to deepset Cloud. + + :param config: The pipeline config to save. + :param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param headers: Headers to pass to API call. + """ config["name"] = pipeline_config_name workspace_url = self._build_workspace_url(workspace=workspace) pipelines_url = f"{workspace_url}/pipelines" @@ -482,6 +549,14 @@ class PipelineClient: workspace: Optional[str] = None, headers: dict = None, ): + """ + Updates a pipeline config on deepset Cloud. + + :param config: The pipeline config to save. + :param pipeline_config_name: Name of the pipeline_config in deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param headers: Headers to pass to API call. + """ config["name"] = pipeline_config_name pipeline_url = self._build_pipeline_url(workspace=workspace, pipeline_config_name=pipeline_config_name) yaml_url = f"{pipeline_url}/yaml" @@ -504,9 +579,9 @@ class PipelineClient: If timeout exceeds a TimeoutError will be raised. If deployment fails a DeepsetCloudError will be raised. - :param pipeline_config_name: name of the config file inside the deepset Cloud workspace. - :param workspace: workspace in deepset Cloud - :param headers: Headers to pass to API call + :param pipeline_config_name: Name of the config file inside the deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param headers: Headers to pass to API call. :param timeout: The time in seconds to wait until deployment completes. If the timeout is exceeded an error will be raised. :param show_curl_message: Whether to print an additional message after successful deployment showing how to query the pipeline using curl. @@ -574,8 +649,8 @@ class PipelineClient: If timeout exceeds a TimeoutError will be raised. If deployment fails a DeepsetCloudError will be raised. - :param pipeline_config_name: name of the config file inside the deepset Cloud workspace. - :param workspace: workspace in deepset Cloud + :param pipeline_config_name: Name of the config file inside the deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. :param headers: Headers to pass to API call :param timeout: The time in seconds to wait until undeployment completes. If the timeout is exceeded an error will be raised. @@ -615,9 +690,9 @@ class PipelineClient: """ Transitions the pipeline config state to desired target_state on deepset Cloud. - :param target_state: the target state of the Pipeline config. - :param pipeline_config_name: name of the config file inside the deepset Cloud workspace. - :param workspace: workspace in deepset Cloud + :param target_state: The target state of the Pipeline config. + :param pipeline_config_name: Name of the config file inside the deepset Cloud workspace. + :param workspace: Specifies the name of the workspace on deepset Cloud. :param headers: Headers to pass to API call :param timeout: The time in seconds to wait until undeployment completes. If the timeout is exceeded an error will be raised. @@ -705,7 +780,7 @@ class EvaluationSetClient: A client to communicate with deepset Cloud evaluation sets and labels. :param client: deepset Cloud client - :param workspace: workspace in deepset Cloud + :param workspace: Specifies the name of the workspace for which you want to create the client. :param evaluation_set: name of the evaluation set to fall back to """ @@ -719,20 +794,17 @@ class EvaluationSetClient: If no labels were found, raises DeepsetCloudError. :param evaluation_set: name of the evaluation set for which labels should be fetched - :param workspace: Optional workspace in deepset Cloud + :param workspace: Specifies the name of the workspace on deepset Cloud. If None, the EvaluationSetClient's default workspace (self.workspace) will be used. :return: list of Label """ - try: - evaluation_sets_response = next( - self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace) - ) - except StopIteration: + evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace) + if evaluation_set_response is None: raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}") labels = self._get_labels_from_evaluation_set( - workspace=workspace, evaluation_set_id=evaluation_sets_response["evaluation_set_id"] + workspace=workspace, evaluation_set_id=evaluation_set_response["evaluation_set_id"] ) return [ @@ -760,43 +832,59 @@ class EvaluationSetClient: :param evaluation_set: Optional evaluation set in deepset Cloud If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used. - :param workspace: Optional workspace in deepset Cloud + :param workspace: Specifies the name of the workspace on deepset Cloud. If None, the EvaluationSetClient's default workspace (self.workspace) will be used. :return: Number of labels for the given (or defaulting) index """ - try: - evaluation_sets_response = next( - self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace) - ) - except StopIteration: + if not evaluation_set: + evaluation_set = self.evaluation_set + + evaluation_set_response = self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace) + if evaluation_set_response is None: raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}") - return evaluation_sets_response["total_labels"] + return evaluation_set_response["total_labels"] def get_evaluation_sets(self, workspace: Optional[str] = None) -> List[dict]: """ Searches for all evaluation set names in the given workspace in deepset Cloud. - :param workspace: Optional workspace in deepset Cloud + :param workspace: Specifies the name of the workspace on deepset Cloud. If None, the EvaluationSetClient's default workspace (self.workspace) will be used. :return: List of dictionaries that represent deepset Cloud evaluation sets. These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields. """ - evaluation_sets_response = self._get_evaluation_set(evaluation_set=None, workspace=workspace) + evaluation_sets_response = self._get_evaluation_sets(workspace=workspace) return [eval_set for eval_set in evaluation_sets_response] - def _get_evaluation_set(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> Generator: - if not evaluation_set: - evaluation_set = self.evaluation_set + def _get_evaluation_sets(self, workspace: Optional[str] = None) -> Generator: + url = self._build_workspace_url(workspace=workspace) + evaluation_set_url = f"{url}/evaluation_sets" + return self.client.get_with_auto_paging(url=evaluation_set_url) + def _get_evaluation_set( + self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None + ) -> Optional[Dict[str, Any]]: url = self._build_workspace_url(workspace=workspace) evaluation_set_url = f"{url}/evaluation_sets" - for response in self.client.get_with_auto_paging(url=evaluation_set_url, query_params={"name": evaluation_set}): - yield response + # evaluation_sets resource uses ids instead of names, + # so we have to query by name (which works as a contains filter) and take the first entry with matching name + query_params = {} + if evaluation_set is not None: + query_params["name"] = evaluation_set + + matches = [ + entry + for entry in self.client.get_with_auto_paging(url=evaluation_set_url, query_params=query_params) + if entry["name"] == evaluation_set + ] + if any(matches): + return matches[0] + return None def _get_labels_from_evaluation_set( self, workspace: Optional[str] = None, evaluation_set_id: Optional[str] = None @@ -819,8 +907,7 @@ class FileClient: A client to manage files on deepset Cloud. :param client: deepset Cloud client - :param workspace: workspace in deepset Cloud - + :param workspace: Specifies the name of the workspace for which you want to create the client. """ self.client = client self.workspace = workspace @@ -832,6 +919,15 @@ class FileClient: workspace: Optional[str] = None, headers: dict = None, ): + """ + Uploads files to the deepset Cloud workspace. + + :param file_paths: File paths to upload (for example .txt or .pdf files) + :param metas: Metadata of the files to upload + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the FileClient's default workspace is used. + :param headers: Headers to pass to API call + """ workspace_url = self._build_workspace_url(workspace) files_url = f"{workspace_url}/files" if metas is None: @@ -856,6 +952,14 @@ class FileClient: logger.info(f"Successfully uploaded {len(file_ids)} files.") def delete_file(self, file_id: str, workspace: Optional[str] = None, headers: dict = None): + """ + Delete a file from the deepset Cloud workspace. + + :param file_id: The id of the file to be deleted. Use `list_files` to retrieve the id of a file. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the FileClient's default workspace is used. + :param headers: Headers to pass to API call + """ workspace_url = self._build_workspace_url(workspace) file_url = f"{workspace_url}/files/{file_id}" self.client.delete(url=file_url, headers=headers) @@ -868,6 +972,17 @@ class FileClient: workspace: Optional[str] = None, headers: dict = None, ) -> Generator: + """ + List all files in the given deepset Cloud workspace. + You can filter by name or by meta values. + + :param name: The name or part of the name of the file. + :param meta_key: The key of the metadata of the file to be filtered for. + :param meta_value: The value of the metadata of the file to be filtered for. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the FileClient's default workspace is used. + :param headers: Headers to pass to API call + """ workspace_url = self._build_workspace_url(workspace) files_url = f"{workspace_url}/files" query_params = {"name": name, "meta_key": meta_key, "meta_value": meta_value} @@ -880,6 +995,168 @@ class FileClient: return self.client.build_workspace_url(workspace) +class EvaluationRunClient: + def __init__(self, client: DeepsetCloudClient, workspace: Optional[str] = None): + """ + A client to manage deepset Cloud evaluation runs. + + :param client: deepset Cloud client + :param workspace: Specifies the name of the workspace for which you want to create the client. + """ + self.client = client + self.workspace = workspace + + def create_eval_run( + self, + eval_run_name: str, + workspace: Optional[str] = None, + pipeline_config_name: Optional[str] = None, + headers: dict = None, + evaluation_set: Optional[str] = None, + eval_mode: Literal["integrated", "isolated"] = "integrated", + debug: bool = False, + comment: Optional[str] = None, + tags: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Creates an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param pipeline_config_name: The name of the pipeline to evaluate. + :param evaluation_set: The name of the evaluation set to use. + :param eval_mode: The evaluation mode to use. + :param debug: Wheter to enable debug output. + :param comment: Comment to add about to the evaluation run. + :param tags: Tags to add to the evaluation run. + :param headers: Headers to pass to API call + """ + workspace_url = self._build_workspace_url(workspace) + eval_run_url = f"{workspace_url}/eval_runs" + response = self.client.post( + eval_run_url, + json={ + "pipeline_name": pipeline_config_name, + "evaluation_set_name": evaluation_set, + "debug": debug, + "eval_mode": 0 if eval_mode == "integrated" else 1, + "comment": comment, + "name": eval_run_name, + "tags": tags, + }, + headers=headers, + ) + return response.json()["data"] + + def get_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None) -> Dict[str, Any]: + """ + Gets the evaluation run and shows its parameters and metrics. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param headers: Headers to pass to API call + """ + workspace_url = self._build_workspace_url(workspace) + eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}" + response = self.client.get(eval_run_url, headers=headers) + return response.json() + + def get_eval_runs(self, workspace: Optional[str] = None, headers: dict = None) -> List[Dict[str, Any]]: + """ + Gets all evaluation runs and shows its parameters and metrics. + + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param headers: Headers to pass to API call + """ + workspace_url = self._build_workspace_url(workspace) + eval_run_url = f"{workspace_url}/eval_runs" + response = self.client.get_with_auto_paging(eval_run_url, headers=headers) + return [eval_run for eval_run in response] + + def delete_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None): + """ + Deletes an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param headers: Headers to pass to API call + """ + workspace_url = self._build_workspace_url(workspace) + eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}" + response = self.client.delete(eval_run_url, headers=headers) + if response.status_code == 204: + logger.info(f"Evaluation run '{eval_run_name}' deleted.") + + def start_eval_run(self, eval_run_name: str, workspace: Optional[str] = None, headers: dict = None): + """ + Starts an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param headers: Headers to pass to API call + """ + workspace_url = self._build_workspace_url(workspace) + eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}/start" + response = self.client.post(eval_run_url, headers=headers) + if response.status_code == 204: + logger.info(f"Evaluation run '{eval_run_name}' has been started.") + + def update_eval_run( + self, + eval_run_name: str, + workspace: Optional[str] = None, + pipeline_config_name: Optional[str] = None, + headers: dict = None, + evaluation_set: Optional[str] = None, + eval_mode: Literal["integrated", "isolated", None] = None, + debug: Optional[bool] = None, + comment: Optional[str] = None, + tags: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Updates an evaluation run. + + :param eval_run_name: The name of the evaluation run to update. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the FileClient's default workspace is used. + :param pipeline_config_name: The name of the pipeline to evaluate. + :param evaluation_set: The name of the evaluation set to use. + :param eval_mode: The evaluation mode to use. + :param debug: Wheter to enable debug output. + :param comment: Comment to add about to the evaluation run. + :param tags: Tags to add to the evaluation run. + :param headers: Headers to pass to API call + """ + workspace_url = self._build_workspace_url(workspace) + eval_run_url = f"{workspace_url}/eval_runs/{eval_run_name}" + eval_mode_param = None + if eval_mode is not None: + eval_mode_param = 0 if eval_mode == "integrated" else 1 + response = self.client.patch( + eval_run_url, + json={ + "pipeline_name": pipeline_config_name, + "evaluation_set_name": evaluation_set, + "debug": debug, + "eval_mode": eval_mode_param, + "comment": comment, + "tags": tags, + }, + headers=headers, + ) + return response.json()["data"] + + def _build_workspace_url(self, workspace: Optional[str] = None): + if workspace is None: + workspace = self.workspace + return self.client.build_workspace_url(workspace) + + class DeepsetCloud: """ A facade to communicate with deepset Cloud. @@ -897,10 +1174,11 @@ class DeepsetCloud: Creates a client to communicate with deepset Cloud indexes. :param api_key: Secret value of the API key. - If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. :param api_endpoint: The URL of the deepset Cloud API. - If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. - :param workspace: workspace in deepset Cloud + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + :param workspace: Specifies the name of the workspace for which you want to create the client. :param index: index in deepset Cloud workspace """ @@ -919,10 +1197,11 @@ class DeepsetCloud: Creates a client to communicate with deepset Cloud pipelines. :param api_key: Secret value of the API key. - If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. :param api_endpoint: The URL of the deepset Cloud API. - If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. - :param workspace: workspace in deepset Cloud + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + :param workspace: Specifies the name of the workspace for which you want to create the client. :param pipeline_config_name: name of the pipeline_config in deepset Cloud workspace """ @@ -941,16 +1220,35 @@ class DeepsetCloud: Creates a client to communicate with deepset Cloud labels. :param api_key: Secret value of the API key. - If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. :param api_endpoint: The URL of the deepset Cloud API. - If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. - :param workspace: workspace in deepset Cloud + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + :param workspace: Specifies the name of the workspace for which you want to create the client. :param evaluation_set: name of the evaluation set in deepset Cloud """ client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint) return EvaluationSetClient(client=client, workspace=workspace, evaluation_set=evaluation_set) + @classmethod + def get_eval_run_client( + cls, api_key: Optional[str] = None, api_endpoint: Optional[str] = None, workspace: str = "default" + ) -> EvaluationRunClient: + """ + Creates a client to manage evaluation runs on deepset Cloud. + + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + :param workspace: Specifies the name of the workspace for which you want to create the client. + + """ + client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint) + return EvaluationRunClient(client=client, workspace=workspace) + @classmethod def get_file_client( cls, api_key: Optional[str] = None, api_endpoint: Optional[str] = None, workspace: str = "default" @@ -959,11 +1257,341 @@ class DeepsetCloud: Creates a client to manage files on deepset Cloud. :param api_key: Secret value of the API key. - If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. :param api_endpoint: The URL of the deepset Cloud API. - If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. - :param workspace: workspace in deepset Cloud + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + :param workspace: Specifies the name of the workspace for which you want to create the client. """ client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint) return FileClient(client=client, workspace=workspace) + + +class DeepsetCloudExperiments: + """ + A facade to conduct and manage experiments within deepset Cloud. + + To start a new experiment run: + 1. Choose a pipeline to evaluate using `list_pipelines()`. + 2. Choose an evaluation set using `list_evaluation_sets()`. + 3. Create and start a new run using `create_and_start_run()`. + 4. Track the run using `get_run()`. + """ + + @classmethod + def list_pipelines( + cls, workspace: str = "default", api_key: Optional[str] = None, api_endpoint: Optional[str] = None + ) -> List[dict]: + """ + Lists all pipelines available on deepset Cloud. + + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + + Returns: + list of dictionaries: List[dict] + each dictionary: { + "name": str -> `pipeline_config_name` to be used in `load_from_deepset_cloud()`, + "..." -> additional pipeline meta information + } + example: + [{'name': 'my_super_nice_pipeline_config', + 'pipeline_id': '2184e0c1-c6ec-40a1-9b28-5d2768e5efa2', + 'status': 'DEPLOYED', + 'created_at': '2022-02-01T09:57:03.803991+00:00', + 'deleted': False, + 'is_default': False, + 'indexing': {'status': 'IN_PROGRESS', + 'pending_file_count': 3, + 'total_file_count': 31}}] + """ + client = DeepsetCloud.get_pipeline_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + pipeline_config_infos = list(client.list_pipeline_configs()) + return pipeline_config_infos + + @classmethod + def list_evaluation_sets( + cls, workspace: str = "default", api_key: Optional[str] = None, api_endpoint: Optional[str] = None + ) -> List[dict]: + """ + Lists all evaluation sets available on deepset Cloud. + + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + + Returns: + list of dictionaries: List[dict] + each dictionary: { + "name": str -> `evaluation_set` to be used in `create_run()`, + "..." -> additional pipeline meta information + } + example: + [{'evaluation_set_id': 'fb084729-57ad-4b57-9f78-ec0eb4d29c9f', + 'name': 'my-question-answering-evaluation-set', + 'created_at': '2022-05-06T09:54:14.830529+00:00', + 'matched_labels': 234, + 'total_labels': 234}] + """ + client = DeepsetCloud.get_evaluation_set_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + return client.get_evaluation_sets() + + @classmethod + def get_runs( + cls, workspace: str = "default", api_key: Optional[str] = None, api_endpoint: Optional[str] = None + ) -> List[dict]: + """ + Gets all evaluation runs. + + :param workspace: Specifies the name of the workspace on deepset Cloud. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + + Returns: + list of dictionaries: List[dict] + example: + [{'eval_run_name': 'my-eval-run-1', + 'parameters': { + 'pipeline_name': 'my-pipeline-1_696bc5d0-ee65-46c1-a308-059507bc353b', + 'evaluation_set_name': 'my-eval-set-name', + 'debug': False, + 'eval_mode': 0 + }, + 'metrics': { + 'isolated_exact_match': 0.45, + 'isolated_f1': 0.89, + 'isolated_sas': 0.91, + 'integrated_exact_match': 0.39, + 'integrated_f1': 0.76, + 'integrated_sas': 0.78, + 'mean_reciprocal_rank': 0.77, + 'mean_average_precision': 0.78, + 'recall_single_hit': 0.91, + 'recall_multi_hit': 0.91, + 'normal_discounted_cummulative_gain': 0.83, + 'precision': 0.52 + }, + 'logs': {}, + 'status': 1, + 'eval_mode': 0, + 'eval_run_labels': [], + 'created_at': '2022-05-24T12:13:16.445857+00:00', + 'comment': 'This is a comment about thiseval run', + 'tags': ['experiment-1', 'experiment-2', 'experiment-3'] + }] + """ + client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + return client.get_eval_runs() + + @classmethod + def create_run( + cls, + eval_run_name: str, + workspace: str = "default", + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + pipeline_config_name: Optional[str] = None, + evaluation_set: Optional[str] = None, + eval_mode: Literal["integrated", "isolated"] = "integrated", + debug: bool = False, + comment: Optional[str] = None, + tags: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Creates an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param pipeline_config_name: The name of the pipeline to evaluate. Use `list_pipelines()` to list all available pipelines. + :param evaluation_set: The name of the evaluation set to use. Use `list_evaluation_sets()` to list all available evaluation sets. + :param eval_mode: The evaluation mode to use. + :param debug: Wheter to enable debug output. + :param comment: Comment to add about to the evaluation run. + :param tags: Tags to add to the evaluation run. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + """ + client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + return client.create_eval_run( + eval_run_name=eval_run_name, + pipeline_config_name=pipeline_config_name, + evaluation_set=evaluation_set, + eval_mode=eval_mode, + debug=debug, + comment=comment, + tags=tags, + ) + + @classmethod + def update_run( + cls, + eval_run_name: str, + workspace: str = "default", + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + pipeline_config_name: Optional[str] = None, + evaluation_set: Optional[str] = None, + eval_mode: Literal["integrated", "isolated"] = "integrated", + debug: bool = False, + comment: Optional[str] = None, + tags: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """ + Updates an evaluation run. + + :param eval_run_name: The name of the evaluation run to update. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the FileClient's default workspace is used. + :param pipeline_config_name: The name of the pipeline to evaluate. Use `list_pipelines()` to list all available pipelines. + :param evaluation_set: The name of the evaluation set to use. Use `list_evaluation_sets()` to list all available evaluation sets. + :param eval_mode: The evaluation mode to use. + :param debug: Wheter to enable debug output. + :param comment: Comment to add about to the evaluation run. + :param tags: Tags to add to the evaluation run. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + """ + client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + return client.update_eval_run( + eval_run_name=eval_run_name, + pipeline_config_name=pipeline_config_name, + evaluation_set=evaluation_set, + eval_mode=eval_mode, + debug=debug, + comment=comment, + tags=tags, + ) + + @classmethod + def get_run( + cls, + eval_run_name: str, + workspace: str = "default", + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + ) -> Dict[str, Any]: + """ + Gets the evaluation run and shows its parameters and metrics. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + """ + client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + return client.get_eval_run(eval_run_name=eval_run_name) + + @classmethod + def delete_run( + cls, + eval_run_name: str, + workspace: str = "default", + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + ): + """ + Deletes an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + """ + client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + return client.delete_eval_run(eval_run_name=eval_run_name) + + @classmethod + def start_run( + cls, + eval_run_name: str, + workspace: str = "default", + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + ): + """ + Starts an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + """ + client = DeepsetCloud.get_eval_run_client(api_key=api_key, api_endpoint=api_endpoint, workspace=workspace) + client.start_eval_run(eval_run_name=eval_run_name) + logger.info("You can check run progess by inspecting the `status` field returned from `get_run()`.") + + @classmethod + def create_and_start_run( + cls, + eval_run_name: str, + workspace: str = "default", + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + pipeline_config_name: Optional[str] = None, + evaluation_set: Optional[str] = None, + eval_mode: Literal["integrated", "isolated"] = "integrated", + debug: bool = False, + comment: Optional[str] = None, + tags: Optional[List[str]] = None, + ): + """ + Creates and starts an evaluation run. + + :param eval_run_name: The name of the evaluation run. + :param workspace: Specifies the name of the workspace on deepset Cloud. + If None, the EvaluationRunClient's default workspace is used. + :param pipeline_config_name: The name of the pipeline to evaluate. Use `list_pipelines()` to list all available pipelines. + :param evaluation_set: The name of the evaluation set to use. Use `list_evaluation_sets()` to list all available evaluation sets. + :param eval_mode: The evaluation mode to use. + :param debug: Wheter to enable debug output. + :param comment: Comment to add about to the evaluation run. + :param tags: Tags to add to the evaluation run. + :param api_key: Secret value of the API key. + If not specified, it's read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the deepset Cloud API. + If not specified, it's read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + If environment variable is not set, defaults to 'https://api.cloud.deepset.ai/api/v1'. + """ + cls.create_run( + eval_run_name=eval_run_name, + workspace=workspace, + api_key=api_key, + api_endpoint=api_endpoint, + pipeline_config_name=pipeline_config_name, + evaluation_set=evaluation_set, + eval_mode=eval_mode, + debug=debug, + comment=comment, + tags=tags, + ) + cls.start_run(eval_run_name=eval_run_name, workspace=workspace, api_key=api_key, api_endpoint=api_endpoint) diff --git a/test/others/test_utils.py b/test/others/test_utils.py index 9b37dd608..3bbc321b4 100644 --- a/test/others/test_utils.py +++ b/test/others/test_utils.py @@ -5,6 +5,7 @@ import pandas as pd from pathlib import Path import responses +from responses import matchers from haystack.utils.deepsetcloud import DeepsetCloud from haystack.utils.preprocessing import convert_files_to_docs, tika_convert_files_to_docs @@ -391,3 +392,505 @@ def test_list_files_on_deepset_cloud(): assert len(files) == 2 assert files[0]["name"] == "sample_pdf_1.pdf" assert files[1]["name"] == "sample_pdf_2.pdf" + + +@pytest.mark.usefixtures(deepset_cloud_fixture.__name__) +@responses.activate +def test_create_eval_run(): + if MOCK_DC: + responses.add( + method=responses.POST, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={"data": {"eval_run_name": "my-eval-run-1"}}, + status=200, + match=[ + matchers.json_params_matcher( + { + "name": "my-eval-run-1", + "pipeline_name": "my-pipeline-1", + "evaluation_set_name": "my-eval-set-1", + "eval_mode": 0, + "comment": "this is my first run", + "debug": False, + "tags": ["my-experiment-1"], + } + ) + ], + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={ + "data": [ + { + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-1", + }, + "status": 1, + } + ], + "has_more": False, + "total": 1, + }, + status=200, + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", + json={ + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-1", + }, + "status": 1, + }, + status=200, + ) + + client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY) + client.create_eval_run( + eval_run_name="my-eval-run-1", + pipeline_config_name="my-pipeline-1", + evaluation_set="my-eval-set-1", + eval_mode="integrated", + comment="this is my first run", + tags=["my-experiment-1"], + ) + + runs = client.get_eval_runs() + assert len(runs) == 1 + assert runs[0]["name"] == "my-eval-run-1" + assert runs[0]["tags"] == ["my-experiment-1"] + assert runs[0]["comment"] == "this is my first run" + assert runs[0]["parameters"]["pipeline_name"] == "my-pipeline-1" + assert runs[0]["parameters"]["evaluation_set_name"] == "my-eval-set-1" + + run = client.get_eval_run("my-eval-run-1") + assert run["name"] == "my-eval-run-1" + assert run["tags"] == ["my-experiment-1"] + assert run["comment"] == "this is my first run" + assert run["parameters"]["pipeline_name"] == "my-pipeline-1" + assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1" + + +@pytest.mark.usefixtures(deepset_cloud_fixture.__name__) +@responses.activate +def test_update_eval_run(): + if MOCK_DC: + responses.add( + method=responses.POST, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={"data": {"eval_run_name": "my-eval-run-1"}}, + status=200, + match=[ + matchers.json_params_matcher( + { + "name": "my-eval-run-1", + "pipeline_name": "my-pipeline-1", + "evaluation_set_name": "my-eval-set-1", + "eval_mode": 0, + "comment": "this is my first run", + "debug": False, + "tags": ["my-experiment-1"], + } + ) + ], + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", + json={ + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-1", + }, + "status": "CREATED", + }, + status=200, + ) + + responses.add( + method=responses.PATCH, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", + json={"data": {"eval_run_name": "my-eval-run-1"}}, + status=200, + match=[ + matchers.json_params_matcher( + {"pipeline_name": "my-pipeline-2", "comment": "this is my first run with second pipeline"} + ) + ], + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", + json={ + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run with second pipeline", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-2", + }, + "status": "CREATED", + }, + status=200, + ) + + client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY) + client.create_eval_run( + eval_run_name="my-eval-run-1", + pipeline_config_name="my-pipeline-1", + evaluation_set="my-eval-set-1", + eval_mode="integrated", + comment="this is my first run", + tags=["my-experiment-1"], + ) + + run = client.get_eval_run("my-eval-run-1") + assert run["name"] == "my-eval-run-1" + assert run["tags"] == ["my-experiment-1"] + assert run["comment"] == "this is my first run" + assert run["parameters"]["pipeline_name"] == "my-pipeline-1" + assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1" + + client.update_eval_run( + eval_run_name="my-eval-run-1", + pipeline_config_name="my-pipeline-2", + comment="this is my first run with second pipeline", + ) + + run = client.get_eval_run("my-eval-run-1") + assert run["name"] == "my-eval-run-1" + assert run["tags"] == ["my-experiment-1"] + assert run["comment"] == "this is my first run with second pipeline" + assert run["parameters"]["pipeline_name"] == "my-pipeline-2" + assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1" + + +@pytest.mark.usefixtures(deepset_cloud_fixture.__name__) +@responses.activate +def test_start_eval_run(): + if MOCK_DC: + responses.add( + method=responses.POST, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={"data": {"eval_run_name": "my-eval-run-1"}}, + status=200, + match=[ + matchers.json_params_matcher( + { + "name": "my-eval-run-1", + "pipeline_name": "my-pipeline-1", + "evaluation_set_name": "my-eval-set-1", + "eval_mode": 0, + "comment": "this is my first run", + "debug": False, + "tags": ["my-experiment-1"], + } + ) + ], + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", + json={ + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-1", + }, + "status": "CREATED", + }, + status=200, + ) + + responses.add( + method=responses.POST, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1/start", + json={}, + status=200, + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", + json={ + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-1", + }, + "status": "STARTED", + }, + status=200, + ) + + client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY) + client.create_eval_run( + eval_run_name="my-eval-run-1", + pipeline_config_name="my-pipeline-1", + evaluation_set="my-eval-set-1", + eval_mode="integrated", + comment="this is my first run", + tags=["my-experiment-1"], + ) + + run = client.get_eval_run("my-eval-run-1") + assert run["name"] == "my-eval-run-1" + assert run["tags"] == ["my-experiment-1"] + assert run["comment"] == "this is my first run" + assert run["parameters"]["pipeline_name"] == "my-pipeline-1" + assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1" + assert run["status"] == "CREATED" + + client.start_eval_run(eval_run_name="my-eval-run-1") + + run = client.get_eval_run("my-eval-run-1") + assert run["name"] == "my-eval-run-1" + assert run["tags"] == ["my-experiment-1"] + assert run["comment"] == "this is my first run" + assert run["parameters"]["pipeline_name"] == "my-pipeline-1" + assert run["parameters"]["evaluation_set_name"] == "my-eval-set-1" + assert run["status"] == "STARTED" + + +@pytest.mark.usefixtures(deepset_cloud_fixture.__name__) +@responses.activate +def test_delete_eval_run(): + if MOCK_DC: + responses.add( + method=responses.POST, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={"data": {"eval_run_name": "my-eval-run-1"}}, + status=200, + match=[ + matchers.json_params_matcher( + { + "name": "my-eval-run-1", + "pipeline_name": "my-pipeline-1", + "evaluation_set_name": "my-eval-set-1", + "eval_mode": 0, + "comment": "this is my first run", + "debug": False, + "tags": ["my-experiment-1"], + } + ) + ], + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={ + "data": [ + { + "created_at": "2022-05-24T12:13:16.445857+00:00", + "eval_mode": 0, + "eval_run_id": "17875c63-7c07-42d8-bb01-4fcd95ce113c", + "name": "my-eval-run-1", + "comment": "this is my first run", + "tags": ["my-experiment-1"], + "eval_run_labels": [], + "logs": {}, + "metrics": { + "integrated_exact_match": None, + "integrated_f1": None, + "integrated_sas": None, + "isolated_exact_match": None, + "isolated_f1": None, + "isolated_sas": None, + "mean_average_precision": None, + "mean_reciprocal_rank": None, + "normal_discounted_cummulative_gain": None, + "precision": None, + "recall_multi_hit": None, + "recall_single_hit": None, + }, + "parameters": { + "debug": False, + "eval_mode": 0, + "evaluation_set_name": "my-eval-set-1", + "pipeline_name": "my-pipeline-1", + }, + "status": 1, + } + ], + "has_more": False, + "total": 1, + }, + status=200, + ) + + responses.add( + method=responses.DELETE, url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs/my-eval-run-1", status=204 + ) + + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/eval_runs", + json={"data": [], "has_more": False, "total": 0}, + status=200, + ) + + client = DeepsetCloud.get_eval_run_client(api_endpoint=DC_API_ENDPOINT, api_key=DC_API_KEY) + client.create_eval_run( + eval_run_name="my-eval-run-1", + pipeline_config_name="my-pipeline-1", + evaluation_set="my-eval-set-1", + eval_mode="integrated", + comment="this is my first run", + tags=["my-experiment-1"], + ) + + runs = client.get_eval_runs() + assert len(runs) == 1 + + run = client.delete_eval_run("my-eval-run-1") + + runs = client.get_eval_runs() + assert len(runs) == 0