mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-11 17:03:33 +00:00
50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
# metadata-ingestion/examples/library/dataflow_comprehensive.py
|
|
from datetime import datetime, timezone
|
|
|
|
from datahub.metadata.urns import CorpGroupUrn, CorpUserUrn, GlossaryTermUrn, TagUrn
|
|
from datahub.sdk import DataFlow, DataHubClient
|
|
|
|
client = DataHubClient.from_env()
|
|
|
|
# Create a DataFlow with comprehensive metadata
|
|
dataflow = DataFlow(
|
|
platform="airflow",
|
|
name="daily_sales_aggregation",
|
|
display_name="Daily Sales Aggregation Pipeline",
|
|
platform_instance="PROD-US-EAST",
|
|
env="PROD",
|
|
description="Aggregates daily sales data from multiple sources and updates reporting tables",
|
|
external_url="https://airflow.company.com/dags/daily_sales_aggregation",
|
|
custom_properties={
|
|
"team": "analytics",
|
|
"schedule": "0 2 * * *",
|
|
"sla_hours": "4",
|
|
"priority": "high",
|
|
},
|
|
created=datetime(2024, 1, 15, tzinfo=timezone.utc),
|
|
last_modified=datetime.now(timezone.utc),
|
|
subtype="ETL",
|
|
owners=[
|
|
CorpUserUrn("jdoe"),
|
|
CorpGroupUrn("data-engineering"),
|
|
],
|
|
tags=[
|
|
TagUrn(name="production"),
|
|
TagUrn(name="sales"),
|
|
TagUrn(name="critical"),
|
|
],
|
|
terms=[
|
|
GlossaryTermUrn("Classification.Confidential"),
|
|
],
|
|
domain="urn:li:domain:sales",
|
|
)
|
|
|
|
# Upsert the DataFlow
|
|
client.entities.upsert(dataflow)
|
|
|
|
print(f"Created DataFlow: {dataflow.urn}")
|
|
print(f"Display Name: {dataflow.display_name}")
|
|
print(f"Description: {dataflow.description}")
|
|
print(f"External URL: {dataflow.external_url}")
|
|
print(f"Custom Properties: {dataflow.custom_properties}")
|