datahub/metadata-ingestion/examples/library/create_datajob_with_flow_urn.py
2025-06-27 15:28:20 +09:00

25 lines
619 B
Python

from datahub.metadata.urns import DataFlowUrn, DatasetUrn
from datahub.sdk import DataHubClient, DataJob
client = DataHubClient.from_env()
# datajob will inherit the platform and platform instance from the flow
datajob = DataJob(
name="example_datajob",
flow_urn=DataFlowUrn(
orchestrator="airflow",
flow_id="example_dag",
cluster="PROD",
),
platform_instance="PROD",
inlets=[
DatasetUrn(platform="hdfs", name="dataset1", env="PROD"),
],
outlets=[
DatasetUrn(platform="hdfs", name="dataset2", env="PROD"),
],
)
client.entities.upsert(datajob)