From f6f939d40f4c71216b83f37ec6f76f712ef614d3 Mon Sep 17 00:00:00 2001 From: skrydal Date: Thu, 4 Sep 2025 16:40:56 +0200 Subject: [PATCH] feat(ingestion/datahub): Improve system metadata handling in datahub source (#14643) --- .../src/datahub/ingestion/source/datahub/config.py | 4 ++++ .../ingestion/source/datahub/datahub_database_reader.py | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py index d5f41cc2eb..f77065e1e4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py @@ -129,6 +129,10 @@ class DataHubSourceConfig(StatefulIngestionConfigBase): description="Timeout for each query in seconds. ", ) + preserve_system_metadata: bool = Field( + default=True, description="Copy system metadata from the source system" + ) + @root_validator(skip_on_failure=True) def check_ingesting_data(cls, values): if ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py index 01ebe1c696..0da0ff7b56 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_database_reader.py @@ -380,7 +380,12 @@ class DataHubDatabaseReader: json_metadata = post_json_transform( json.loads(row["systemmetadata"] or "{}") ) - system_metadata = SystemMetadataClass.from_obj(json_metadata) + system_metadata = None + if self.config.preserve_system_metadata: + system_metadata = SystemMetadataClass.from_obj(json_metadata) + if system_metadata.properties: + is_no_op = system_metadata.properties.pop("isNoOp", None) + logger.debug(f"Removed potential value for is_no_op={is_no_op}") return MetadataChangeProposalWrapper( entityUrn=row["urn"], aspect=ASPECT_MAP[row["aspect"]].from_obj(json_aspect),