datahub/metadata-ingestion/examples/library/dataflow_comprehensive.py

50 lines
1.5 KiB
Python

# metadata-ingestion/examples/library/dataflow_comprehensive.py
from datetime import datetime, timezone
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn, GlossaryTermUrn, TagUrn
from datahub.sdk import DataFlow, DataHubClient
client = DataHubClient.from_env()
# Create a DataFlow with comprehensive metadata
dataflow = DataFlow(
platform="airflow",
name="daily_sales_aggregation",
display_name="Daily Sales Aggregation Pipeline",
platform_instance="PROD-US-EAST",
env="PROD",
description="Aggregates daily sales data from multiple sources and updates reporting tables",
external_url="https://airflow.company.com/dags/daily_sales_aggregation",
custom_properties={
"team": "analytics",
"schedule": "0 2 * * *",
"sla_hours": "4",
"priority": "high",
},
created=datetime(2024, 1, 15, tzinfo=timezone.utc),
last_modified=datetime.now(timezone.utc),
subtype="ETL",
owners=[
CorpUserUrn("jdoe"),
CorpGroupUrn("data-engineering"),
],
tags=[
TagUrn(name="production"),
TagUrn(name="sales"),
TagUrn(name="critical"),
],
terms=[
GlossaryTermUrn("Classification.Confidential"),
],
domain="urn:li:domain:sales",
)
# Upsert the DataFlow
client.entities.upsert(dataflow)
print(f"Created DataFlow: {dataflow.urn}")
print(f"Display Name: {dataflow.display_name}")
print(f"Description: {dataflow.description}")
print(f"External URL: {dataflow.external_url}")
print(f"Custom Properties: {dataflow.custom_properties}")