feat(ingestion/datahub): Improve system metadata handling in datahub source (#14643)

This commit is contained in:
skrydal 2025-09-04 16:40:56 +02:00 committed by GitHub
parent 510b2a4082
commit f6f939d40f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 1 deletions

View File

@ -129,6 +129,10 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
description="Timeout for each query in seconds. ",
)
preserve_system_metadata: bool = Field(
default=True, description="Copy system metadata from the source system"
)
@root_validator(skip_on_failure=True)
def check_ingesting_data(cls, values):
if (

View File

@ -380,7 +380,12 @@ class DataHubDatabaseReader:
json_metadata = post_json_transform(
json.loads(row["systemmetadata"] or "{}")
)
system_metadata = None
if self.config.preserve_system_metadata:
system_metadata = SystemMetadataClass.from_obj(json_metadata)
if system_metadata.properties:
is_no_op = system_metadata.properties.pop("isNoOp", None)
logger.debug(f"Removed potential value for is_no_op={is_no_op}")
return MetadataChangeProposalWrapper(
entityUrn=row["urn"],
aspect=ASPECT_MAP[row["aspect"]].from_obj(json_aspect),