mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-12 19:49:36 +00:00
25 lines
619 B
Python
25 lines
619 B
Python
from datahub.metadata.urns import DataFlowUrn, DatasetUrn
|
|
from datahub.sdk import DataHubClient, DataJob
|
|
|
|
client = DataHubClient.from_env()
|
|
|
|
# datajob will inherit the platform and platform instance from the flow
|
|
|
|
datajob = DataJob(
|
|
name="example_datajob",
|
|
flow_urn=DataFlowUrn(
|
|
orchestrator="airflow",
|
|
flow_id="example_dag",
|
|
cluster="PROD",
|
|
),
|
|
platform_instance="PROD",
|
|
inlets=[
|
|
DatasetUrn(platform="hdfs", name="dataset1", env="PROD"),
|
|
],
|
|
outlets=[
|
|
DatasetUrn(platform="hdfs", name="dataset2", env="PROD"),
|
|
],
|
|
)
|
|
|
|
client.entities.upsert(datajob)
|