From 67844f1a538d35238a0838c309bb62e33086d557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Thu, 5 Jun 2025 14:46:14 +0200 Subject: [PATCH] feat(sdk): add EntityClient.delete documentation and tests (#13688) --- .../src/datahub/sdk/entity_client.py | 41 +++++ .../tests/unit/sdk_v2/test_entity_client.py | 142 +++++++++++++++++- 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/sdk/entity_client.py b/metadata-ingestion/src/datahub/sdk/entity_client.py index ecd1af1040..e2678ef879 100644 --- a/metadata-ingestion/src/datahub/sdk/entity_client.py +++ b/metadata-ingestion/src/datahub/sdk/entity_client.py @@ -153,3 +153,44 @@ class EntityClient: mcps = updater.build() self._graph.emit_mcps(mcps) + + def delete( + self, + urn: UrnOrStr, + check_exists: bool = True, + cascade: bool = False, + hard: bool = False, + ) -> None: + """Delete an entity by its urn. + + Args: + urn: The urn of the entity to delete. Can be a string or :py:class:`Urn` object. + check_exists: Whether to check if the entity exists before deletion. Defaults to True. + cascade: Whether to cascade delete related entities. When True, deletes child entities + like datajobs within dataflows, datasets within containers, etc. Not yet supported. + hard: Whether to perform a hard delete (permanent) or soft delete. Defaults to False. + + Raises: + SdkUsageError: If the entity does not exist and check_exists is True, or if cascade is True (not supported). + + Note: + When hard is True, the operation is irreversible and the entity will be permanently removed. + + Impact of cascade deletion (still to be done) depends on the input entity type: + - Container: Recursively deletes all containers and data assets within the container. + - Dataflow: Recursively deletes all data jobs within the dataflow. + - Dashboard: TBD + - DataPlatformInstance: TBD + - ... + """ + urn_str = str(urn) if isinstance(urn, Urn) else urn + if check_exists and not self._graph.exists(entity_urn=urn_str): + raise SdkUsageError( + f"Entity {urn_str} does not exist, and hence cannot be deleted. " + "You can bypass this check by setting check_exists=False." + ) + + if cascade: + raise SdkUsageError("The 'cascade' parameter is not yet supported.") + + self._graph.delete_entity(urn=urn_str, hard=hard) diff --git a/metadata-ingestion/tests/unit/sdk_v2/test_entity_client.py b/metadata-ingestion/tests/unit/sdk_v2/test_entity_client.py index a105c446a7..85ab70b023 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/test_entity_client.py +++ b/metadata-ingestion/tests/unit/sdk_v2/test_entity_client.py @@ -1,4 +1,6 @@ import pathlib +from dataclasses import dataclass +from typing import Optional, Tuple, Type, Union from unittest.mock import Mock import pytest @@ -7,7 +9,7 @@ import datahub.metadata.schema_classes as models from datahub.emitter.mcp_builder import DatabaseKey, SchemaKey from datahub.errors import ItemNotFoundError, SdkUsageError from datahub.ingestion.graph.client import DataHubGraph -from datahub.metadata.urns import DatasetUrn, TagUrn +from datahub.metadata.urns import DatasetUrn, TagUrn, Urn from datahub.sdk.container import Container from datahub.sdk.dataset import Dataset from datahub.sdk.main_client import DataHubClient @@ -141,3 +143,141 @@ def test_get_nonexistent_dataset_fails(client: DataHubClient, mock_graph: Mock) with pytest.raises(ItemNotFoundError, match="Entity .* not found"): client.entities.get(dataset_urn) + + +@dataclass +class EntityClientDeleteTestParams: + """Test parameters for the delete method.""" + + urn: Union[str, Urn] + check_exists: bool = True + cascade: bool = False + hard: bool = False + entity_exists: bool = True + expected_exception: Optional[Type[Exception]] = None + expected_graph_exists_call: bool = True + expected_delete_call: Optional[Tuple[str, bool]] = None + + +@pytest.mark.parametrize( + "params", + [ + pytest.param( + EntityClientDeleteTestParams( + urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + check_exists=True, + cascade=False, + hard=False, + entity_exists=True, + expected_exception=None, + expected_graph_exists_call=True, + expected_delete_call=( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + False, + ), + ), + id="successful_soft_delete_with_exists_check", + ), + pytest.param( + EntityClientDeleteTestParams( + urn=DatasetUrn(platform="snowflake", name="test.table", env="prod"), + check_exists=True, + cascade=False, + hard=True, + entity_exists=True, + expected_exception=None, + expected_graph_exists_call=True, + expected_delete_call=( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + True, + ), + ), + id="successful_hard_delete_with_urn_object", + ), + pytest.param( + EntityClientDeleteTestParams( + urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + check_exists=False, + cascade=False, + hard=False, + entity_exists=False, + expected_exception=None, + expected_graph_exists_call=False, + expected_delete_call=( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + False, + ), + ), + id="delete_without_exists_check", + ), + pytest.param( + EntityClientDeleteTestParams( + urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + check_exists=True, + cascade=False, + hard=False, + entity_exists=False, + expected_exception=SdkUsageError, + expected_graph_exists_call=True, + expected_delete_call=None, + ), + id="delete_nonexistent_entity_with_check", + ), + pytest.param( + EntityClientDeleteTestParams( + urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,test.table,PROD)", + check_exists=True, + cascade=True, + hard=False, + entity_exists=True, + expected_exception=SdkUsageError, + expected_graph_exists_call=True, + expected_delete_call=None, + ), + id="cascade_delete_not_supported", + ), + ], +) +def test_delete_entity( + client: DataHubClient, + mock_graph: Mock, + params: EntityClientDeleteTestParams, +) -> None: + """Test delete method with various parameter combinations.""" + # Setup mock + mock_graph.exists.return_value = params.entity_exists + mock_graph.delete_entity = Mock() + + if params.expected_exception: + # Test that expected exception is raised + with pytest.raises(params.expected_exception): + client.entities.delete( + urn=params.urn, + check_exists=params.check_exists, + cascade=params.cascade, + hard=params.hard, + ) + else: + # Test successful deletion + client.entities.delete( + urn=params.urn, + check_exists=params.check_exists, + cascade=params.cascade, + hard=params.hard, + ) + + # Verify graph.exists was called correctly + if params.expected_graph_exists_call: + expected_urn_str = str(params.urn) + mock_graph.exists.assert_called_once_with(entity_urn=expected_urn_str) + else: + mock_graph.exists.assert_not_called() + + # Verify graph.delete_entity was called correctly + if params.expected_delete_call: + expected_urn_str, expected_hard = params.expected_delete_call + mock_graph.delete_entity.assert_called_once_with( + urn=expected_urn_str, hard=expected_hard + ) + else: + mock_graph.delete_entity.assert_not_called()