2025-05-09 10:20:48 +09:00
|
|
|
from datahub.metadata.urns import DataFlowUrn, DataJobUrn, DatasetUrn
|
2025-06-12 14:00:26 +09:00
|
|
|
from datahub.sdk import DataHubClient
|
2025-05-09 10:20:48 +09:00
|
|
|
|
|
|
|
|
client = DataHubClient.from_env()
|
|
|
|
|
|
2025-06-12 14:00:26 +09:00
|
|
|
dataflow_urn = DataFlowUrn(
|
|
|
|
|
orchestrator="airflow", flow_id="data_pipeline", cluster="PROD"
|
|
|
|
|
)
|
2025-05-09 10:20:48 +09:00
|
|
|
|
2025-06-12 14:00:26 +09:00
|
|
|
client.lineage.add_lineage(
|
|
|
|
|
upstream=DataJobUrn(flow=dataflow_urn, job_id="data_job_1"),
|
|
|
|
|
downstream=DatasetUrn(platform="postgres", name="raw_data"),
|
2025-05-09 10:20:48 +09:00
|
|
|
)
|