2023-05-17 10:21:39 +09:00
|
|
|
import datahub.emitter.mce_builder as builder
|
|
|
|
import datahub.metadata.schema_classes as models
|
|
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
|
|
|
|
|
|
# Create an emitter to DataHub over REST
|
|
|
|
emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={})
|
|
|
|
|
|
|
|
dataset_urn = builder.make_dataset_urn(
|
2023-09-19 09:02:24 -07:00
|
|
|
name="fct_users_created", platform="hive", env="PROD"
|
2023-05-17 10:21:39 +09:00
|
|
|
)
|
|
|
|
feature_urn = builder.make_ml_feature_urn(
|
2023-09-19 09:02:24 -07:00
|
|
|
feature_table_name="users_feature_table",
|
|
|
|
feature_name="user_signup_date",
|
2023-05-17 10:21:39 +09:00
|
|
|
)
|
|
|
|
|
|
|
|
# Create feature
|
|
|
|
metadata_change_proposal = MetadataChangeProposalWrapper(
|
|
|
|
entityType="mlFeature",
|
|
|
|
changeType=models.ChangeTypeClass.UPSERT,
|
|
|
|
entityUrn=feature_urn,
|
|
|
|
aspectName="mlFeatureProperties",
|
|
|
|
aspect=models.MLFeaturePropertiesClass(
|
2023-09-19 09:02:24 -07:00
|
|
|
description="Represents the date the user created their account",
|
|
|
|
# attaching a source to a feature creates lineage between the feature
|
|
|
|
# and the upstream dataset. This is how lineage between your data warehouse
|
|
|
|
# and machine learning ecosystem is established.
|
|
|
|
sources=[dataset_urn],
|
|
|
|
dataType="TIME",
|
2023-05-17 10:21:39 +09:00
|
|
|
),
|
|
|
|
)
|
|
|
|
|
|
|
|
# Emit metadata!
|
|
|
|
emitter.emit(metadata_change_proposal)
|