29 lines
717 B
Python

from datahub.metadata.urns import DatasetUrn, TagUrn
from datahub.sdk import DataFlow, DataHubClient, DataJob
client = DataHubClient.from_env()
# datajob will inherit the platform and platform instance from the flow
dataflow = DataFlow(
platform="airflow",
name="example_dag",
platform_instance="PROD",
description="example dataflow",
tags=[TagUrn(name="tag1"), TagUrn(name="tag2")],
)
datajob = DataJob(
name="example_datajob",
flow=dataflow,
inlets=[
DatasetUrn(platform="hdfs", name="dataset1", env="PROD"),
],
outlets=[
DatasetUrn(platform="hdfs", name="dataset2", env="PROD"),
],
)
client.entities.upsert(dataflow)
client.entities.upsert(datajob)