2025-06-12 14:00:26 +09:00
|
|
|
from datahub.metadata.urns import DatasetUrn, TagUrn
|
2025-05-29 22:53:56 +09:00
|
|
|
from datahub.sdk import DataFlow, DataHubClient, DataJob
|
|
|
|
|
|
|
|
client = DataHubClient.from_env()
|
|
|
|
|
|
|
|
# datajob will inherit the platform and platform instance from the flow
|
|
|
|
|
|
|
|
dataflow = DataFlow(
|
|
|
|
platform="airflow",
|
|
|
|
name="example_dag",
|
|
|
|
platform_instance="PROD",
|
|
|
|
description="example dataflow",
|
|
|
|
tags=[TagUrn(name="tag1"), TagUrn(name="tag2")],
|
|
|
|
)
|
|
|
|
|
|
|
|
datajob = DataJob(
|
|
|
|
name="example_datajob",
|
|
|
|
flow=dataflow,
|
2025-06-12 14:00:26 +09:00
|
|
|
inlets=[
|
|
|
|
DatasetUrn(platform="hdfs", name="dataset1", env="PROD"),
|
|
|
|
],
|
|
|
|
outlets=[
|
|
|
|
DatasetUrn(platform="hdfs", name="dataset2", env="PROD"),
|
|
|
|
],
|
2025-05-29 22:53:56 +09:00
|
|
|
)
|
|
|
|
|
|
|
|
client.entities.upsert(dataflow)
|
|
|
|
client.entities.upsert(datajob)
|