99 lines
3.6 KiB
Python
Raw Normal View History

import logging
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
# Imports for metadata model classes
from datahub.metadata.schema_classes import (
PropertyValueClass,
StructuredPropertyDefinitionClass,
)
from datahub.metadata.urns import StructuredPropertyUrn
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
# Create rest emitter
rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
# first, let's make an open ended structured property that allows one text value
text_property_urn = StructuredPropertyUrn("io.acryl.openTextProperty")
text_property_definition = StructuredPropertyDefinitionClass(
qualifiedName="io.acryl.openTextProperty",
displayName="Open Text Property",
valueType="urn:li:dataType:datahub.string",
cardinality="SINGLE",
entityTypes=[
"urn:li:entityType:datahub.dataset",
"urn:li:entityType:datahub.container",
],
description="This structured property allows a signle open ended response as a value",
immutable=False,
)
event_prop_1: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
entityUrn=str(text_property_urn),
aspect=text_property_definition,
)
rest_emitter.emit(event_prop_1)
# next, let's make a property that allows for multiple datahub entity urns as values
# This example property could be used to reference other users or groups in datahub
urn_property_urn = StructuredPropertyUrn("io.acryl.dataManagement.dataSteward")
urn_property_definition = StructuredPropertyDefinitionClass(
qualifiedName="io.acryl.dataManagement.dataSteward",
displayName="Data Steward",
valueType="urn:li:dataType:datahub.urn",
cardinality="MULTIPLE",
entityTypes=["urn:li:entityType:datahub.dataset"],
description="The data stewards of this asset are in charge of ensuring data cleanliness and governance",
immutable=True,
typeQualifier={
"allowedTypes": [
"urn:li:entityType:datahub.corpuser",
"urn:li:entityType:datahub.corpGroup",
]
}, # this line ensures only user or group urns can be assigned as values
)
event_prop_2: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
entityUrn=str(urn_property_urn),
aspect=urn_property_definition,
)
rest_emitter.emit(event_prop_2)
# finally, let's make a single select number property with a few allowed options
number_property_urn = StructuredPropertyUrn("io.acryl.dataManagement.replicationSLA")
number_property_definition = StructuredPropertyDefinitionClass(
qualifiedName="io.acryl.dataManagement.replicationSLA",
displayName="Retention Time",
valueType="urn:li:dataType:datahub.number",
cardinality="SINGLE",
entityTypes=[
"urn:li:entityType:datahub.dataset",
"urn:li:entityType:datahub.dataFlow",
],
description="SLA for how long data can be delayed before replicating to the destination cluster",
immutable=False,
allowedValues=[
PropertyValueClass(
value=30,
description="30 days, usually reserved for datasets that are ephemeral and contain pii",
),
PropertyValueClass(
value=90,
description="Use this for datasets that drive monthly reporting but contain pii",
),
PropertyValueClass(
value=365,
description="Use this for non-sensitive data that can be retained for longer",
),
],
)
event_prop_3: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
entityUrn=str(number_property_urn),
aspect=number_property_definition,
)
rest_emitter.emit(event_prop_3)