mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-11 17:03:33 +00:00
120 lines
3.3 KiB
Python
120 lines
3.3 KiB
Python
import datahub.emitter.mce_builder as builder
|
|
import datahub.metadata.schema_classes as models
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
|
|
gms_endpoint = "http://localhost:8080"
|
|
emitter = DatahubRestEmitter(gms_server=gms_endpoint, extra_headers={})
|
|
|
|
# Step 1: Create the source dataset for lineage
|
|
dataset_urn = builder.make_dataset_urn(
|
|
name="customer_transactions", platform="snowflake", env="PROD"
|
|
)
|
|
|
|
# Step 2: Create the primary key entity
|
|
primary_key_urn = builder.make_ml_primary_key_urn(
|
|
feature_table_name="transaction_features",
|
|
primary_key_name="transaction_id",
|
|
)
|
|
|
|
primary_key_properties = models.MLPrimaryKeyPropertiesClass(
|
|
description="Unique identifier for each transaction",
|
|
dataType="TEXT",
|
|
sources=[dataset_urn],
|
|
)
|
|
|
|
emitter.emit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=primary_key_urn,
|
|
aspect=primary_key_properties,
|
|
)
|
|
)
|
|
|
|
# Step 3: Create the feature entities
|
|
feature_1_urn = builder.make_ml_feature_urn(
|
|
feature_name="transaction_amount",
|
|
feature_table_name="transaction_features",
|
|
)
|
|
|
|
emitter.emit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=feature_1_urn,
|
|
aspect=models.MLFeaturePropertiesClass(
|
|
description="Total amount of the transaction in USD",
|
|
dataType="CONTINUOUS",
|
|
sources=[dataset_urn],
|
|
),
|
|
)
|
|
)
|
|
|
|
feature_2_urn = builder.make_ml_feature_urn(
|
|
feature_name="is_fraud",
|
|
feature_table_name="transaction_features",
|
|
)
|
|
|
|
emitter.emit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=feature_2_urn,
|
|
aspect=models.MLFeaturePropertiesClass(
|
|
description="Binary indicator of fraudulent transaction",
|
|
dataType="BINARY",
|
|
sources=[dataset_urn],
|
|
),
|
|
)
|
|
)
|
|
|
|
# Step 4: Create the feature table with all properties
|
|
feature_table_urn = builder.make_ml_feature_table_urn(
|
|
feature_table_name="transaction_features", platform="feast"
|
|
)
|
|
|
|
feature_table_properties = models.MLFeatureTablePropertiesClass(
|
|
description="Real-time transaction features for fraud detection models",
|
|
mlFeatures=[feature_1_urn, feature_2_urn],
|
|
mlPrimaryKeys=[primary_key_urn],
|
|
customProperties={
|
|
"update_frequency": "real-time",
|
|
"team": "fraud-detection",
|
|
"critical": "true",
|
|
},
|
|
)
|
|
|
|
emitter.emit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=feature_table_urn,
|
|
aspect=feature_table_properties,
|
|
)
|
|
)
|
|
|
|
# Step 5: Add tags for categorization
|
|
emitter.emit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=feature_table_urn,
|
|
aspect=models.GlobalTagsClass(
|
|
tags=[
|
|
models.TagAssociationClass(tag=builder.make_tag_urn("Fraud Detection")),
|
|
models.TagAssociationClass(
|
|
tag=builder.make_tag_urn("Real-time Features")
|
|
),
|
|
]
|
|
),
|
|
)
|
|
)
|
|
|
|
# Step 6: Add ownership
|
|
emitter.emit(
|
|
MetadataChangeProposalWrapper(
|
|
entityUrn=feature_table_urn,
|
|
aspect=models.OwnershipClass(
|
|
owners=[
|
|
models.OwnerClass(
|
|
owner=builder.make_user_urn("data_science_team"),
|
|
type=models.OwnershipTypeClass.DATAOWNER,
|
|
)
|
|
]
|
|
),
|
|
)
|
|
)
|
|
|
|
print(f"Successfully created feature table: {feature_table_urn}")
|