datahub/metadata-ingestion/examples/library/create_mlprimarykey.py

35 lines
1.2 KiB
Python

import datahub.emitter.mce_builder as builder
import datahub.metadata.schema_classes as models
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
# Create an emitter to DataHub over REST
emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
dataset_urn = builder.make_dataset_urn(
name="fct_users_created", platform="hive", env="PROD"
)
primary_key_urn = builder.make_ml_primary_key_urn(
feature_table_name="users_feature_table",
primary_key_name="user_id",
)
# Create feature
metadata_change_proposal = MetadataChangeProposalWrapper(
entityType="mlPrimaryKey",
changeType=models.ChangeTypeClass.UPSERT,
entityUrn=primary_key_urn,
aspectName="mlPrimaryKeyProperties",
aspect=models.MLPrimaryKeyPropertiesClass(
description="Represents the id of the user the other features relate to.",
# attaching a source to a ml primary key creates lineage between the feature
# and the upstream dataset. This is how lineage between your data warehouse
# and machine learning ecosystem is established.
sources=[dataset_urn],
dataType="TEXT",
),
)
# Emit metadata!
emitter.emit(metadata_change_proposal)