37 lines
1.3 KiB
Python

import os
import datahub.emitter.mce_builder as builder
import datahub.metadata.schema_classes as models
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.rest_emitter import DatahubRestEmitter
# Create an emitter to DataHub over REST
gms_server = os.getenv("DATAHUB_GMS_URL", "http://localhost:8080")
token = os.getenv("DATAHUB_GMS_TOKEN")
emitter = DatahubRestEmitter(gms_server=gms_server, token=token)
dataset_urn = builder.make_dataset_urn(
name="fct_users_created", platform="hive", env="PROD"
)
feature_urn = builder.make_ml_feature_urn(
feature_table_name="users_feature_table",
feature_name="user_signup_date",
)
# Create feature
metadata_change_proposal = MetadataChangeProposalWrapper(
entityUrn=feature_urn,
aspect=models.MLFeaturePropertiesClass(
description="Represents the date the user created their account",
# attaching a source to a feature creates lineage between the feature
# and the upstream dataset. This is how lineage between your data warehouse
# and machine learning ecosystem is established.
sources=[dataset_urn],
dataType="TIME",
),
)
# Emit metadata!
emitter.emit_mcp(metadata_change_proposal)
print(f"Created ML feature: {feature_urn}")