From 0f69e96078b5f75a7f1280c57957401e13cf4bf8 Mon Sep 17 00:00:00 2001 From: Anush Kumar Date: Tue, 23 Sep 2025 07:07:52 -0700 Subject: [PATCH] feat(sdk): Added support for Change Audit Stamps in Dashboard and Chart entities (#14815) --- .../ingestion/source/looker/looker_source.py | 55 ++++- metadata-ingestion/src/datahub/sdk/_shared.py | 126 +++++++++++ metadata-ingestion/src/datahub/sdk/chart.py | 117 +++++++--- .../src/datahub/sdk/dashboard.py | 109 ++++++--- .../test_chart_audit_stamps_golden.json | 92 ++++++++ .../test_dashboard_audit_stamps_golden.json | 84 +++++++ .../tests/unit/sdk_v2/test_chart.py | 128 +++++++++++ .../tests/unit/sdk_v2/test_dashboard.py | 211 ++++++++++++++++++ 8 files changed, 858 insertions(+), 64 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sdk_v2/chart_golden/test_chart_audit_stamps_golden.json create mode 100644 metadata-ingestion/tests/unit/sdk_v2/dashboard_golden/test_dashboard_audit_stamps_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index b23f2ee17a..d6026b330c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -736,7 +736,16 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase): display_name=dashboard_element.title, # title is (deprecated) using display_name extra_aspects=chart_extra_aspects, input_datasets=dashboard_element.get_view_urns(self.source_config), - last_modified=self._get_last_modified_time(dashboard), + last_modified=self._get_last_modified_time( + dashboard + ), # Inherited from Dashboard + last_modified_by=self._get_last_modified_by( + dashboard + ), # Inherited from Dashboard + created_at=self._get_created_at(dashboard), # Inherited from Dashboard + created_by=self._get_created_by(dashboard), # Inherited from Dashboard + deleted_on=self._get_deleted_on(dashboard), # Inherited from Dashboard + deleted_by=self._get_deleted_by(dashboard), # Inherited from Dashboard name=dashboard_element.get_urn_element_id(), owners=chart_ownership, parent_container=chart_parent_container, @@ -803,6 +812,11 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase): display_name=looker_dashboard.title, # title is (deprecated) using display_name extra_aspects=dashboard_extra_aspects, last_modified=self._get_last_modified_time(looker_dashboard), + last_modified_by=self._get_last_modified_by(looker_dashboard), + created_at=self._get_created_at(looker_dashboard), + created_by=self._get_created_by(looker_dashboard), + deleted_on=self._get_deleted_on(looker_dashboard), + deleted_by=self._get_deleted_by(looker_dashboard), name=looker_dashboard.get_urn_dashboard_id(), owners=dashboard_ownership, parent_container=dashboard_parent_container, @@ -988,9 +1002,44 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase): def _get_last_modified_time( self, looker_dashboard: Optional[LookerDashboard] ) -> Optional[datetime.datetime]: - if looker_dashboard is None: + return looker_dashboard.last_updated_at if looker_dashboard else None + + def _get_last_modified_by( + self, looker_dashboard: Optional[LookerDashboard] + ) -> Optional[str]: + if not looker_dashboard or not looker_dashboard.last_updated_by: return None - return looker_dashboard.last_updated_at + return looker_dashboard.last_updated_by.get_urn( + self.source_config.strip_user_ids_from_email + ) + + def _get_created_at( + self, looker_dashboard: Optional[LookerDashboard] + ) -> Optional[datetime.datetime]: + return looker_dashboard.created_at if looker_dashboard else None + + def _get_created_by( + self, looker_dashboard: Optional[LookerDashboard] + ) -> Optional[str]: + if not looker_dashboard or not looker_dashboard.owner: + return None + return looker_dashboard.owner.get_urn( + self.source_config.strip_user_ids_from_email + ) + + def _get_deleted_on( + self, looker_dashboard: Optional[LookerDashboard] + ) -> Optional[datetime.datetime]: + return looker_dashboard.deleted_at if looker_dashboard else None + + def _get_deleted_by( + self, looker_dashboard: Optional[LookerDashboard] + ) -> Optional[str]: + if not looker_dashboard or not looker_dashboard.deleted_by: + return None + return looker_dashboard.deleted_by.get_urn( + self.source_config.strip_user_ids_from_email + ) def _get_looker_folder(self, folder: Union[Folder, FolderBase]) -> LookerFolder: assert folder.id diff --git a/metadata-ingestion/src/datahub/sdk/_shared.py b/metadata-ingestion/src/datahub/sdk/_shared.py index a1cbc775e1..4bdcd77838 100644 --- a/metadata-ingestion/src/datahub/sdk/_shared.py +++ b/metadata-ingestion/src/datahub/sdk/_shared.py @@ -1,6 +1,7 @@ from __future__ import annotations import warnings +from abc import ABC, abstractmethod from datetime import datetime from typing import ( TYPE_CHECKING, @@ -61,6 +62,7 @@ DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn] DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn] ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn] +ActorUrnOrStr: TypeAlias = Union[str, ActorUrn] StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn] StructuredPropertyValueType: TypeAlias = Union[str, float, int] StructuredPropertyInputType: TypeAlias = Dict[ @@ -110,6 +112,130 @@ def parse_time_stamp(ts: Optional[models.TimeStampClass]) -> Optional[datetime]: return parse_ts_millis(ts.time) +class ChangeAuditStampsMixin(ABC): + """Mixin class for managing audit stamps on entities.""" + + __slots__ = () + + @abstractmethod + def _get_audit_stamps(self) -> models.ChangeAuditStampsClass: + """Get the audit stamps from the entity properties.""" + pass + + @abstractmethod + def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None: + """Set the audit stamps on the entity properties.""" + pass + + @property + def last_modified(self) -> Optional[datetime]: + """Get the last modification timestamp from audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if audit_stamps.lastModified.time == 0: + return None + return datetime.fromtimestamp( + audit_stamps.lastModified.time / 1000 + ) # supports only seconds precision + + def set_last_modified(self, last_modified: datetime) -> None: + """Set the last modification timestamp in audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + audit_stamps.lastModified.time = make_ts_millis(last_modified) + self._set_audit_stamps(audit_stamps) + + @property + def last_modified_by(self) -> Optional[str]: + """Get the last modification actor from audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if audit_stamps.lastModified.actor == builder.UNKNOWN_USER: + return None + return audit_stamps.lastModified.actor + + def set_last_modified_by(self, last_modified_by: ActorUrnOrStr) -> None: + """Set the last modification actor in audit stamps.""" + if isinstance(last_modified_by, str): + last_modified_by = make_user_urn(last_modified_by) + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + audit_stamps.lastModified.actor = str(last_modified_by) + self._set_audit_stamps(audit_stamps) + + @property + def created_at(self) -> Optional[datetime]: + """Get the creation timestamp from audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if audit_stamps.created.time == 0: + return None + return datetime.fromtimestamp( + audit_stamps.created.time / 1000 + ) # supports only seconds precision + + def set_created_at(self, created_at: datetime) -> None: + """Set the creation timestamp in audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + audit_stamps.created.time = make_ts_millis(created_at) + self._set_audit_stamps(audit_stamps) + + @property + def created_by(self) -> Optional[ActorUrnOrStr]: + """Get the creation actor from audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if audit_stamps.created.actor == builder.UNKNOWN_USER: + return None + return audit_stamps.created.actor + + def set_created_by(self, created_by: ActorUrnOrStr) -> None: + """Set the creation actor in audit stamps.""" + if isinstance(created_by, str): + created_by = make_user_urn(created_by) + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + audit_stamps.created.actor = str(created_by) + self._set_audit_stamps(audit_stamps) + + @property + def deleted_on(self) -> Optional[datetime]: + """Get the deletion timestamp from audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if audit_stamps.deleted is None or audit_stamps.deleted.time == 0: + return None + return datetime.fromtimestamp( + audit_stamps.deleted.time / 1000 + ) # supports only seconds precision + + def set_deleted_on(self, deleted_on: datetime) -> None: + """Set the deletion timestamp in audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + # Default constructor sets deleted to None + if audit_stamps.deleted is None: + audit_stamps.deleted = models.AuditStampClass( + time=0, actor=builder.UNKNOWN_USER + ) + audit_stamps.deleted.time = make_ts_millis(deleted_on) + self._set_audit_stamps(audit_stamps) + + @property + def deleted_by(self) -> Optional[ActorUrnOrStr]: + """Get the deletion actor from audit stamps.""" + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if ( + audit_stamps.deleted is None + or audit_stamps.deleted.actor == builder.UNKNOWN_USER + ): + return None + return audit_stamps.deleted.actor + + def set_deleted_by(self, deleted_by: ActorUrnOrStr) -> None: + """Set the deletion actor in audit stamps.""" + if isinstance(deleted_by, str): + deleted_by = make_user_urn(deleted_by) + audit_stamps: models.ChangeAuditStampsClass = self._get_audit_stamps() + if audit_stamps.deleted is None: + audit_stamps.deleted = models.AuditStampClass( + time=0, actor=builder.UNKNOWN_USER + ) + audit_stamps.deleted.actor = str(deleted_by) + self._set_audit_stamps(audit_stamps) + + class HasPlatformInstance(Entity): __slots__ = () diff --git a/metadata-ingestion/src/datahub/sdk/chart.py b/metadata-ingestion/src/datahub/sdk/chart.py index 7fca122595..37cb132353 100644 --- a/metadata-ingestion/src/datahub/sdk/chart.py +++ b/metadata-ingestion/src/datahub/sdk/chart.py @@ -10,6 +10,8 @@ import datahub.metadata.schema_classes as models from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.urns import ChartUrn, DatasetUrn, Urn from datahub.sdk._shared import ( + ActorUrnOrStr, + ChangeAuditStampsMixin, DataPlatformInstanceUrnOrStr, DataPlatformUrnOrStr, DatasetUrnOrStr, @@ -34,6 +36,7 @@ from datahub.utilities.sentinels import Unset, unset class Chart( + ChangeAuditStampsMixin, HasPlatformInstance, HasSubtype, HasOwnership, @@ -70,6 +73,11 @@ class Chart( chart_url: Optional[str] = None, custom_properties: Optional[Dict[str, str]] = None, last_modified: Optional[datetime] = None, + last_modified_by: Optional[ActorUrnOrStr] = None, + created_at: Optional[datetime] = None, + created_by: Optional[ActorUrnOrStr] = None, + deleted_on: Optional[datetime] = None, + deleted_by: Optional[ActorUrnOrStr] = None, last_refreshed: Optional[datetime] = None, chart_type: Optional[Union[str, models.ChartTypeClass]] = None, access: Optional[str] = None, @@ -94,13 +102,60 @@ class Chart( self._set_extra_aspects(extra_aspects) self._set_platform_instance(platform, platform_instance) - self._ensure_chart_props(display_name=display_name) + self._init_chart_properties( + description, + display_name, + external_url, + chart_url, + custom_properties, + last_modified, + last_modified_by, + created_at, + created_by, + last_refreshed, + deleted_on, + deleted_by, + chart_type, + access, + input_datasets, + ) + self._init_standard_aspects( + parent_container, subtype, owners, links, tags, terms, domain + ) - if display_name is not None: - self.set_display_name(display_name) + @classmethod + def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self: + assert isinstance(urn, ChartUrn) + entity = cls( + platform=urn.dashboard_tool, + name=urn.chart_id, + ) + return entity._init_from_graph(current_aspects) + + def _init_chart_properties( + self, + description: Optional[str], + display_name: Optional[str], + external_url: Optional[str], + chart_url: Optional[str], + custom_properties: Optional[Dict[str, str]], + last_modified: Optional[datetime], + last_modified_by: Optional[ActorUrnOrStr], + created_at: Optional[datetime], + created_by: Optional[ActorUrnOrStr], + last_refreshed: Optional[datetime], + deleted_on: Optional[datetime], + deleted_by: Optional[ActorUrnOrStr], + chart_type: Optional[Union[str, models.ChartTypeClass]], + access: Optional[str], + input_datasets: Optional[Sequence[Union[DatasetUrnOrStr, Dataset]]], + ) -> None: + """Initialize chart-specific properties.""" if description is not None: self.set_description(description) + if display_name is not None: + self.set_display_name(display_name) if external_url is not None: self.set_external_url(external_url) if chart_url is not None: @@ -109,6 +164,16 @@ class Chart( self.set_custom_properties(custom_properties) if last_modified is not None: self.set_last_modified(last_modified) + if last_modified_by is not None: + self.set_last_modified_by(last_modified_by) + if created_at is not None: + self.set_created_at(created_at) + if created_by is not None: + self.set_created_by(created_by) + if deleted_on is not None: + self.set_deleted_on(deleted_on) + if deleted_by is not None: + self.set_deleted_by(deleted_by) if last_refreshed is not None: self.set_last_refreshed(last_refreshed) if chart_type is not None: @@ -118,6 +183,17 @@ class Chart( if input_datasets is not None: self.set_input_datasets(input_datasets) + def _init_standard_aspects( + self, + parent_container: ParentContainerInputType | Unset, + subtype: Optional[str], + owners: Optional[OwnersInputType], + links: Optional[LinksInputType], + tags: Optional[TagsInputType], + terms: Optional[TermsInputType], + domain: Optional[DomainInputType], + ) -> None: + """Initialize standard aspects.""" if parent_container is not unset: self._set_container(parent_container) if subtype is not None: @@ -133,15 +209,6 @@ class Chart( if domain is not None: self.set_domain(domain) - @classmethod - def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self: - assert isinstance(urn, ChartUrn) - entity = cls( - platform=urn.dashboard_tool, - name=urn.chart_id, - ) - return entity._init_from_graph(current_aspects) - @property def urn(self) -> ChartUrn: assert isinstance(self._urn, ChartUrn) @@ -159,6 +226,14 @@ class Chart( ) ) + def _get_audit_stamps(self) -> models.ChangeAuditStampsClass: + """Get the audit stamps from the chart properties.""" + return self._ensure_chart_props().lastModified + + def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None: + """Set the audit stamps on the chart properties.""" + self._ensure_chart_props().lastModified = audit_stamps + @property def name(self) -> str: """Get the name of the chart.""" @@ -220,24 +295,6 @@ class Chart( """Set the custom properties of the chart.""" self._ensure_chart_props().customProperties = custom_properties - @property - def last_modified(self) -> Optional[datetime]: - """Get the last modification timestamp of the chart.""" - last_modified_time = self._ensure_chart_props().lastModified.lastModified.time - if not last_modified_time: - return None - return datetime.fromtimestamp(last_modified_time) - - def set_last_modified(self, last_modified: datetime) -> None: - """Set the last modification timestamp of the chart.""" - chart_props = self._ensure_chart_props() - chart_props.lastModified = models.ChangeAuditStampsClass( - lastModified=models.AuditStampClass( - time=int(last_modified.timestamp()), - actor="urn:li:corpuser:datahub", - ) - ) - @property def last_refreshed(self) -> Optional[datetime]: """Get the last refresh timestamp of the chart.""" diff --git a/metadata-ingestion/src/datahub/sdk/dashboard.py b/metadata-ingestion/src/datahub/sdk/dashboard.py index 00b656e1bf..420ab1f7cc 100644 --- a/metadata-ingestion/src/datahub/sdk/dashboard.py +++ b/metadata-ingestion/src/datahub/sdk/dashboard.py @@ -9,6 +9,8 @@ from typing_extensions import Self import datahub.metadata.schema_classes as models from datahub.metadata.urns import ChartUrn, DashboardUrn, DatasetUrn, Urn from datahub.sdk._shared import ( + ActorUrnOrStr, + ChangeAuditStampsMixin, ChartUrnOrStr, DashboardUrnOrStr, DataPlatformInstanceUrnOrStr, @@ -36,6 +38,7 @@ from datahub.utilities.sentinels import Unset, unset class Dashboard( + ChangeAuditStampsMixin, HasPlatformInstance, HasSubtype, HasOwnership, @@ -72,6 +75,11 @@ class Dashboard( dashboard_url: Optional[str] = None, custom_properties: Optional[Dict[str, str]] = None, last_modified: Optional[datetime] = None, + last_modified_by: Optional[ActorUrnOrStr] = None, + created_at: Optional[datetime] = None, + created_by: Optional[ActorUrnOrStr] = None, + deleted_on: Optional[datetime] = None, + deleted_by: Optional[ActorUrnOrStr] = None, last_refreshed: Optional[datetime] = None, input_datasets: Optional[Sequence[Union[DatasetUrnOrStr, Dataset]]] = None, charts: Optional[Sequence[Union[ChartUrnOrStr, Chart]]] = None, @@ -96,17 +104,48 @@ class Dashboard( self._set_extra_aspects(extra_aspects) self._set_platform_instance(platform, platform_instance) + self._ensure_dashboard_props(display_name=display_name) - # Initialize DashboardInfoClass with default values - dashboard_info = self._ensure_dashboard_props(display_name=display_name) - if last_modified: - dashboard_info.lastModified = models.ChangeAuditStampsClass( - lastModified=models.AuditStampClass( - time=int(last_modified.timestamp()), - actor="urn:li:corpuser:datahub", - ), - ) + self._init_dashboard_properties( + description, + display_name, + external_url, + dashboard_url, + custom_properties, + last_modified, + last_modified_by, + created_at, + created_by, + last_refreshed, + deleted_on, + deleted_by, + input_datasets, + charts, + dashboards, + ) + self._init_standard_aspects( + parent_container, subtype, owners, links, tags, terms, domain + ) + def _init_dashboard_properties( + self, + description: Optional[str], + display_name: Optional[str], + external_url: Optional[str], + dashboard_url: Optional[str], + custom_properties: Optional[Dict[str, str]], + last_modified: Optional[datetime], + last_modified_by: Optional[ActorUrnOrStr], + created_at: Optional[datetime], + created_by: Optional[ActorUrnOrStr], + last_refreshed: Optional[datetime], + deleted_on: Optional[datetime], + deleted_by: Optional[ActorUrnOrStr], + input_datasets: Optional[Sequence[Union[DatasetUrnOrStr, Dataset]]], + charts: Optional[Sequence[Union[ChartUrnOrStr, Chart]]], + dashboards: Optional[Sequence[Union[DashboardUrnOrStr, Dashboard]]], + ) -> None: + """Initialize dashboard-specific properties.""" if description is not None: self.set_description(description) if display_name is not None: @@ -119,6 +158,16 @@ class Dashboard( self.set_custom_properties(custom_properties) if last_modified is not None: self.set_last_modified(last_modified) + if last_modified_by is not None: + self.set_last_modified_by(last_modified_by) + if created_at is not None: + self.set_created_at(created_at) + if created_by is not None: + self.set_created_by(created_by) + if deleted_on is not None: + self.set_deleted_on(deleted_on) + if deleted_by is not None: + self.set_deleted_by(deleted_by) if last_refreshed is not None: self.set_last_refreshed(last_refreshed) if input_datasets is not None: @@ -128,6 +177,17 @@ class Dashboard( if dashboards is not None: self.set_dashboards(dashboards) + def _init_standard_aspects( + self, + parent_container: ParentContainerInputType | Unset, + subtype: Optional[str], + owners: Optional[OwnersInputType], + links: Optional[LinksInputType], + tags: Optional[TagsInputType], + terms: Optional[TermsInputType], + domain: Optional[DomainInputType], + ) -> None: + """Initialize standard aspects.""" if parent_container is not unset: self._set_container(parent_container) if subtype is not None: @@ -165,16 +225,20 @@ class Dashboard( models.DashboardInfoClass( title=display_name or self.urn.dashboard_id, description="", - lastModified=models.ChangeAuditStampsClass( - lastModified=models.AuditStampClass( - time=0, actor="urn:li:corpuser:unknown" - ) - ), + lastModified=models.ChangeAuditStampsClass(), customProperties={}, dashboards=[], ) ) + def _get_audit_stamps(self) -> models.ChangeAuditStampsClass: + """Get the audit stamps from the dashboard properties.""" + return self._ensure_dashboard_props().lastModified + + def _set_audit_stamps(self, audit_stamps: models.ChangeAuditStampsClass) -> None: + """Set the audit stamps on the dashboard properties.""" + self._ensure_dashboard_props().lastModified = audit_stamps + @property def name(self) -> str: """Get the name of the dashboard.""" @@ -238,23 +302,6 @@ class Dashboard( """Set the custom properties of the dashboard.""" self._ensure_dashboard_props().customProperties = custom_properties - @property - def last_modified(self) -> Optional[datetime]: - """Get the last modification timestamp of the dashboard.""" - props = self._ensure_dashboard_props() - if props.lastModified.lastModified.time == 0: - return None - return datetime.fromtimestamp(props.lastModified.lastModified.time) - - def set_last_modified(self, last_modified: datetime) -> None: - """Set the last modification timestamp of the dashboard.""" - self._ensure_dashboard_props().lastModified = models.ChangeAuditStampsClass( - lastModified=models.AuditStampClass( - time=int(last_modified.timestamp()), - actor="urn:li:corpuser:datahub", - ), - ) - @property def last_refreshed(self) -> Optional[datetime]: """Get the last refresh timestamp of the dashboard.""" diff --git a/metadata-ingestion/tests/unit/sdk_v2/chart_golden/test_chart_audit_stamps_golden.json b/metadata-ingestion/tests/unit/sdk_v2/chart_golden/test_chart_audit_stamps_golden.json new file mode 100644 index 0000000000..7842c75aa1 --- /dev/null +++ b/metadata-ingestion/tests/unit/sdk_v2/chart_golden/test_chart_audit_stamps_golden.json @@ -0,0 +1,92 @@ +[ +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,audit_golden_chart)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,audit_golden_chart)", + "changeType": "UPSERT", + "aspectName": "chartInfo", + "aspect": { + "json": { + "customProperties": { + "audit_test": "true", + "chart_type": "line" + }, + "title": "Audit Golden Chart", + "description": "Testing chart audit stamps with golden files", + "lastModified": { + "created": { + "time": 1672596000000, + "actor": "urn:li:corpuser:creator@example.com" + }, + "lastModified": { + "time": 1672698600000, + "actor": "urn:li:corpuser:modifier@example.com" + }, + "deleted": { + "time": 1672793100000, + "actor": "urn:li:corpuser:deleter@example.com" + } + }, + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_data,PROD)" + }, + { + "string": "urn:li:dataset:(urn:li:dataPlatform:snowflake,user_data,PROD)" + } + ], + "type": "LINE", + "access": "PRIVATE" + } + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,audit_golden_chart)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:owner@example.com", + "type": "TECHNICAL_OWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,audit_golden_chart)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:audit" + }, + { + "tag": "urn:li:tag:chart" + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sdk_v2/dashboard_golden/test_dashboard_audit_stamps_golden.json b/metadata-ingestion/tests/unit/sdk_v2/dashboard_golden/test_dashboard_audit_stamps_golden.json new file mode 100644 index 0000000000..2d46db7e46 --- /dev/null +++ b/metadata-ingestion/tests/unit/sdk_v2/dashboard_golden/test_dashboard_audit_stamps_golden.json @@ -0,0 +1,84 @@ +[ +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,audit_golden_dashboard)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,audit_golden_dashboard)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "audit_test": "true" + }, + "title": "Audit Golden Test", + "description": "Testing audit stamps with golden files", + "charts": [], + "datasets": [], + "dashboards": [], + "lastModified": { + "created": { + "time": 1672596000000, + "actor": "urn:li:corpuser:creator@example.com" + }, + "lastModified": { + "time": 1672698600000, + "actor": "urn:li:corpuser:modifier@example.com" + }, + "deleted": { + "time": 1672793100000, + "actor": "urn:li:corpuser:deleter@example.com" + } + } + } + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,audit_golden_dashboard)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:owner@example.com", + "type": "TECHNICAL_OWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,audit_golden_dashboard)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:audit" + }, + { + "tag": "urn:li:tag:test" + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sdk_v2/test_chart.py b/metadata-ingestion/tests/unit/sdk_v2/test_chart.py index ecd526baf9..0a70bc642a 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/test_chart.py +++ b/metadata-ingestion/tests/unit/sdk_v2/test_chart.py @@ -243,3 +243,131 @@ def test_chart_set_chart_type() -> None: assert c.chart_type == models.ChartTypeClass.BAR with pytest.raises(ValueError, match=r"Invalid chart type:.*"): c.set_chart_type("invalid_type") + + +def test_chart_audit_stamps_integration() -> None: + """Test HasAuditStamps mixin integration with Chart-specific functionality.""" + created_time = datetime(2023, 1, 1, 10, 0, 0) + modified_time = datetime(2023, 1, 2, 14, 30, 0) + deleted_time = datetime(2023, 1, 3, 16, 45, 0) + + # Test initialization with audit stamps and chart-specific properties + c = Chart( + platform="looker", + name="audit_integration_chart", + display_name="Audit Integration Chart", + description="Testing audit stamps with chart features", + created_at=created_time, + created_by="creator@example.com", + last_modified=modified_time, + last_modified_by="modifier@example.com", + deleted_on=deleted_time, + deleted_by="deleter@example.com", + chart_type=models.ChartTypeClass.BAR, + access=models.AccessLevelClass.PUBLIC, + owners=[CorpUserUrn("owner@example.com")], + tags=[TagUrn("test")], + ) + + # Verify audit stamps work alongside chart properties + assert c.created_at == created_time + assert c.created_by == "urn:li:corpuser:creator@example.com" + assert c.last_modified == modified_time + assert c.last_modified_by == "urn:li:corpuser:modifier@example.com" + assert c.deleted_on == deleted_time + assert c.deleted_by == "urn:li:corpuser:deleter@example.com" + + # Verify chart-specific properties still work + assert c.display_name == "Audit Integration Chart" + assert c.description == "Testing audit stamps with chart features" + assert c.chart_type == models.ChartTypeClass.BAR + assert c.access == models.AccessLevelClass.PUBLIC + assert c.owners is not None + assert c.tags is not None + + # Test that modifying audit stamps doesn't affect chart properties + new_modified_time = datetime(2023, 1, 4, 18, 0, 0) + c.set_last_modified(new_modified_time) + c.set_last_modified_by("new_modifier@example.com") + + assert c.last_modified == new_modified_time + assert c.last_modified_by == "urn:li:corpuser:new_modifier@example.com" + + # Chart properties should remain unchanged + assert c.display_name == "Audit Integration Chart" + assert c.chart_type == models.ChartTypeClass.BAR + assert c.access == models.AccessLevelClass.PUBLIC + + +def test_chart_audit_stamps_with_input_datasets() -> None: + """Test audit stamps with chart input dataset operations.""" + c = Chart( + platform="looker", + name="audit_datasets_chart", + created_at=datetime(2023, 1, 1, 10, 0, 0), + created_by="creator@example.com", + ) + + # Add input datasets + c.add_input_dataset("urn:li:dataset:(urn:li:dataPlatform:snowflake,table1,PROD)") + c.add_input_dataset("urn:li:dataset:(urn:li:dataPlatform:snowflake,table2,PROD)") + + # Verify audit stamps and input datasets work together + assert c.created_at == datetime(2023, 1, 1, 10, 0, 0) + assert c.created_by == "urn:li:corpuser:creator@example.com" + assert len(c.input_datasets) == 2 + + # Modify audit stamps + c.set_last_modified(datetime(2023, 1, 2, 12, 0, 0)) + c.set_last_modified_by("modifier@example.com") + + # Verify both audit stamps and input datasets are preserved + assert c.last_modified == datetime(2023, 1, 2, 12, 0, 0) + assert c.last_modified_by == "urn:li:corpuser:modifier@example.com" + assert len(c.input_datasets) == 2 + + # Remove a dataset + c.remove_input_dataset("urn:li:dataset:(urn:li:dataPlatform:snowflake,table1,PROD)") + + # Verify audit stamps are still intact + assert c.last_modified == datetime(2023, 1, 2, 12, 0, 0) + assert c.last_modified_by == "urn:li:corpuser:modifier@example.com" + assert len(c.input_datasets) == 1 + + +def test_chart_audit_stamps_golden() -> None: + """Test chart audit stamps with golden file comparison.""" + created_time = datetime(2023, 1, 1, 10, 0, 0) + modified_time = datetime(2023, 1, 2, 14, 30, 0) + deleted_time = datetime(2023, 1, 3, 16, 45, 0) + + c = Chart( + platform="looker", + name="audit_golden_chart", + display_name="Audit Golden Chart", + description="Testing chart audit stamps with golden files", + created_at=created_time, + created_by="creator@example.com", + last_modified=modified_time, + last_modified_by="modifier@example.com", + deleted_on=deleted_time, + deleted_by="deleter@example.com", + chart_type=models.ChartTypeClass.LINE, + access=models.AccessLevelClass.PRIVATE, + owners=[CorpUserUrn("owner@example.com")], + tags=[TagUrn("audit"), TagUrn("chart")], + custom_properties={"audit_test": "true", "chart_type": "line"}, + ) + + # Add some input datasets for comprehensive testing + c.add_input_dataset( + "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_data,PROD)" + ) + c.add_input_dataset("urn:li:dataset:(urn:li:dataPlatform:snowflake,user_data,PROD)") + + # Generate golden file for chart audit stamps functionality + assert_entity_golden( + c, + GOLDEN_DIR / "test_chart_audit_stamps_golden.json", + ["lastRefreshed"], # Exclude timestamp fields that might vary between test runs + ) diff --git a/metadata-ingestion/tests/unit/sdk_v2/test_dashboard.py b/metadata-ingestion/tests/unit/sdk_v2/test_dashboard.py index 11f03111fa..5cf0ba15e1 100644 --- a/metadata-ingestion/tests/unit/sdk_v2/test_dashboard.py +++ b/metadata-ingestion/tests/unit/sdk_v2/test_dashboard.py @@ -5,6 +5,7 @@ from unittest import mock import pytest +from datahub.emitter import mce_builder from datahub.errors import ItemNotFoundError from datahub.metadata.urns import ( ChartUrn, @@ -227,3 +228,213 @@ def test_client_get_dashboard() -> None: mock_entities.get.side_effect = ItemNotFoundError(error_message) with pytest.raises(ItemNotFoundError, match=re.escape(error_message)): mock_client.entities.get(dashboard_urn) + + +def test_dashboard_audit_stamps() -> None: + """Test HasAuditStamps mixin functionality on Dashboard.""" + created_time = datetime(2023, 1, 1, 10, 0, 0) + modified_time = datetime(2023, 1, 2, 14, 30, 0) + deleted_time = datetime(2023, 1, 3, 16, 45, 0) + + # Test initialization with audit stamps + d = Dashboard( + platform="looker", + name="audit_test_dashboard", + created_at=created_time, + created_by="creator@example.com", + last_modified=modified_time, + last_modified_by="modifier@example.com", + deleted_on=deleted_time, + deleted_by="deleter@example.com", + ) + + # Test created_at and created_by + assert d.created_at == created_time + assert d.created_by == "urn:li:corpuser:creator@example.com" + + # Test last_modified and last_modified_by + assert d.last_modified == modified_time + assert d.last_modified_by == "urn:li:corpuser:modifier@example.com" + + # Test deleted_on and deleted_by + assert d.deleted_on == deleted_time + assert d.deleted_by == "urn:li:corpuser:deleter@example.com" + + +def test_dashboard_audit_stamps_setters() -> None: + """Test setting audit stamps after initialization.""" + d = Dashboard( + platform="looker", + name="audit_setter_test_dashboard", + ) + + # Initially all should be None + assert d.created_at is None + assert d.created_by is None + assert d.last_modified is None + assert d.last_modified_by is None + assert d.deleted_on is None + assert d.deleted_by is None + + # Test setting timestamps + created_time = datetime(2023, 1, 1, 10, 0, 0) + modified_time = datetime(2023, 1, 2, 14, 30, 0) + deleted_time = datetime(2023, 1, 3, 16, 45, 0) + + d.set_created_at(created_time) + d.set_last_modified(modified_time) + d.set_deleted_on(deleted_time) + + assert d.created_at == created_time + assert d.last_modified == modified_time + assert d.deleted_on == deleted_time + + # Test setting actors with string URNs + d.set_created_by("creator@example.com") + d.set_last_modified_by("modifier@example.com") + d.set_deleted_by("deleter@example.com") + + assert d.created_by == "urn:li:corpuser:creator@example.com" + assert d.last_modified_by == "urn:li:corpuser:modifier@example.com" + assert d.deleted_by == "urn:li:corpuser:deleter@example.com" + + # Test setting actors with CorpUserUrn objects + from datahub.metadata.urns import CorpUserUrn + + creator_urn = CorpUserUrn("creator_urn@example.com") + modifier_urn = CorpUserUrn("modifier_urn@example.com") + deleter_urn = CorpUserUrn("deleter_urn@example.com") + + d.set_created_by(creator_urn) + d.set_last_modified_by(modifier_urn) + d.set_deleted_by(deleter_urn) + + assert d.created_by == str(creator_urn) + assert d.last_modified_by == str(modifier_urn) + assert d.deleted_by == str(deleter_urn) + + +def test_dashboard_audit_stamps_edge_cases() -> None: + """Test edge cases for audit stamps - setting None values""" + d = Dashboard( + platform="looker", + name="audit_edge_case_dashboard", + ) + + # These should not raise errors and should set to None or default values + assert d.created_by is None + assert d.last_modified_by is None + assert d.deleted_by is None + + # Internally it should have the default values + assert d._get_audit_stamps().created.actor == mce_builder.UNKNOWN_USER + assert d._get_audit_stamps().lastModified.actor == mce_builder.UNKNOWN_USER + assert ( + d._get_audit_stamps().deleted is None + ) # deleted has no default value as per the pdl + + assert d.created_at is None + assert d.last_modified is None + assert d.deleted_on is None + + # Internally it should have the default values + assert d._get_audit_stamps().created.time == 0 + assert d._get_audit_stamps().lastModified.time == 0 + assert ( + d._get_audit_stamps().deleted is None + ) # deleted has no default value as per the pdl + + # Test that timestamps are properly converted + test_time = datetime(2023, 1, 1, 12, 0, 0) + d.set_created_at(test_time) + d.set_last_modified(test_time) + d.set_deleted_on(test_time) + + # Verify the timestamps are stored correctly + assert d.created_at == test_time + assert d.last_modified == test_time + assert d.deleted_on == test_time + + assert d._get_audit_stamps().created.time == mce_builder.make_ts_millis(test_time) + assert d._get_audit_stamps().lastModified.time == mce_builder.make_ts_millis( + test_time + ) + # deleted should not be None after setting deleted_on + deleted_stamp = d._get_audit_stamps().deleted + assert deleted_stamp is not None + assert deleted_stamp.time == mce_builder.make_ts_millis(test_time) + + +def test_dashboard_audit_stamps_integration() -> None: + """Test audit stamps integration with other dashboard functionality.""" + created_time = datetime(2023, 1, 1, 10, 0, 0) + modified_time = datetime(2023, 1, 2, 14, 30, 0) + + d = Dashboard( + platform="looker", + name="audit_integration_dashboard", + display_name="Audit Integration Test", + description="Testing audit stamps with other features", + created_at=created_time, + created_by="creator@example.com", + last_modified=modified_time, + last_modified_by="modifier@example.com", + owners=[CorpUserUrn("owner@example.com")], + tags=[TagUrn("test")], + ) + + # Verify audit stamps work alongside other properties + assert d.created_at == created_time + assert d.created_by == "urn:li:corpuser:creator@example.com" + assert d.last_modified == modified_time + assert d.last_modified_by == "urn:li:corpuser:modifier@example.com" + + # Verify other properties still work + assert d.display_name == "Audit Integration Test" + assert d.description == "Testing audit stamps with other features" + assert d.owners is not None + assert d.tags is not None + + # Test that modifying audit stamps doesn't affect other properties + new_modified_time = datetime(2023, 1, 3, 16, 0, 0) + d.set_last_modified(new_modified_time) + d.set_last_modified_by("new_modifier@example.com") + + assert d.last_modified == new_modified_time + assert d.last_modified_by == "urn:li:corpuser:new_modifier@example.com" + + # Other properties should remain unchanged + assert d.display_name == "Audit Integration Test" + assert d.description == "Testing audit stamps with other features" + assert d.owners is not None + assert d.tags is not None + + +def test_dashboard_audit_stamps_golden() -> None: + """Test audit stamps with golden file comparison.""" + created_time = datetime(2023, 1, 1, 10, 0, 0) + modified_time = datetime(2023, 1, 2, 14, 30, 0) + deleted_time = datetime(2023, 1, 3, 16, 45, 0) + + d = Dashboard( + platform="looker", + name="audit_golden_dashboard", + display_name="Audit Golden Test", + description="Testing audit stamps with golden files", + created_at=created_time, + created_by="creator@example.com", + last_modified=modified_time, + last_modified_by="modifier@example.com", + deleted_on=deleted_time, + deleted_by="deleter@example.com", + owners=[CorpUserUrn("owner@example.com")], + tags=[TagUrn("audit"), TagUrn("test")], + custom_properties={"audit_test": "true"}, + ) + + # Generate golden file for audit stamps functionality + assert_entity_golden( + d, + _GOLDEN_DIR / "test_dashboard_audit_stamps_golden.json", + ["lastRefreshed"], # Exclude timestamp fields that might vary between test runs + )