diff --git a/metadata-models-custom/README.md b/metadata-models-custom/README.md index 40895ed2c3..4a35989891 100644 --- a/metadata-models-custom/README.md +++ b/metadata-models-custom/README.md @@ -129,6 +129,15 @@ results in Update succeeded with status 200 ``` +The `scripts/insert_custom_aspect.py` script shows you how to accomplish the same using the Python SDK. Note that we are just using a raw dictionary here to represent the `dq_rule` aspect and not a strongly-typed class. +```console +cd scripts +python3 insert_custom_aspect.py +``` +results in +```console +Successfully wrote to DataHub +``` ## Advanced Guide diff --git a/metadata-models-custom/scripts/insert_custom_aspect.py b/metadata-models-custom/scripts/insert_custom_aspect.py new file mode 100644 index 0000000000..707fc71add --- /dev/null +++ b/metadata-models-custom/scripts/insert_custom_aspect.py @@ -0,0 +1,48 @@ +import json + +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + GenericAspectClass, + MetadataChangeProposalClass, +) + +dq_aspect = { + "rules": [ + { + "field": "my_event_data", + "isFieldLevel": False, + "type": "isNull", + "checkDefinition": "n/a", + "url": "https://github.com/datahub-project/datahub/blob/master/checks/nonNull.sql", + }, + { + "field": "timestamp", + "isFieldLevel": True, + "type": "increasing", + "checkDefinition": "n/a", + "url": "https://github.com/datahub-project/datahub/blob/master/checks/increasing.sql", + }, + ] +} + +emitter: DatahubRestEmitter = DatahubRestEmitter(gms_server="http://localhost:8080") + +dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)" +mcp_raw: MetadataChangeProposalClass = MetadataChangeProposalClass( + entityType="dataset", + entityUrn=dataset_urn, + changeType=ChangeTypeClass.UPSERT, + aspectName="customDataQualityRules", + aspect=GenericAspectClass( + contentType="application/json", + value=json.dumps(dq_aspect).encode("utf-8"), + ), +) + +try: + emitter.emit(mcp_raw) + print("Successfully wrote to DataHub") +except Exception as e: + print("Failed to write to DataHub") + raise e