From 64bcc132a3d5f39d82553ca81857a8f73e88306b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90=E1=BA=B7ng=20Minh=20D=C5=A9ng?= Date: Thu, 13 Jul 2023 06:37:36 +0700 Subject: [PATCH] feat(ingest/airflow): able to set `platform_instance` in `Dataset` (#8313) Co-authored-by: Andrew Sikowitz --- metadata-ingestion/src/datahub_provider/entities.py | 9 ++++++++- .../example_dags/lineage_backend_demo.py | 9 +++++++-- .../example_dags/lineage_emission_dag.py | 12 +++++++++--- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub_provider/entities.py b/metadata-ingestion/src/datahub_provider/entities.py index d54db1c286..bfccc2f22e 100644 --- a/metadata-ingestion/src/datahub_provider/entities.py +++ b/metadata-ingestion/src/datahub_provider/entities.py @@ -1,4 +1,5 @@ from abc import abstractmethod +from typing import Optional import attr @@ -18,10 +19,16 @@ class Dataset(_Entity): platform: str name: str env: str = builder.DEFAULT_ENV + platform_instance: Optional[str] = None @property def urn(self): - return builder.make_dataset_urn(self.platform, self.name, self.env) + return builder.make_dataset_urn_with_platform_instance( + platform=self.platform, + name=self.name, + platform_instance=self.platform_instance, + env=self.env, + ) @attr.s(str=True) diff --git a/metadata-ingestion/src/datahub_provider/example_dags/lineage_backend_demo.py b/metadata-ingestion/src/datahub_provider/example_dags/lineage_backend_demo.py index e04159f388..95b594e405 100644 --- a/metadata-ingestion/src/datahub_provider/example_dags/lineage_backend_demo.py +++ b/metadata-ingestion/src/datahub_provider/example_dags/lineage_backend_demo.py @@ -34,8 +34,13 @@ with DAG( dag=dag, bash_command="echo 'This is where you might run your data tooling.'", inlets=[ - Dataset("snowflake", "mydb.schema.tableA"), - Dataset("snowflake", "mydb.schema.tableB", "DEV"), + Dataset(platform="snowflake", name="mydb.schema.tableA"), + Dataset(platform="snowflake", name="mydb.schema.tableB", env="DEV"), + Dataset( + platform="snowflake", + name="mydb.schema.tableC", + platform_instance="cloud", + ), # You can also put dataset URNs in the inlets/outlets lists. Urn( "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)" diff --git a/metadata-ingestion/src/datahub_provider/example_dags/lineage_emission_dag.py b/metadata-ingestion/src/datahub_provider/example_dags/lineage_emission_dag.py index c77ae3f00a..153464246c 100644 --- a/metadata-ingestion/src/datahub_provider/example_dags/lineage_emission_dag.py +++ b/metadata-ingestion/src/datahub_provider/example_dags/lineage_emission_dag.py @@ -57,11 +57,17 @@ with DAG( mces=[ builder.make_lineage_mce( upstream_urns=[ - builder.make_dataset_urn("snowflake", "mydb.schema.tableA"), - builder.make_dataset_urn("snowflake", "mydb.schema.tableB"), + builder.make_dataset_urn( + platform="snowflake", name="mydb.schema.tableA" + ), + builder.make_dataset_urn_with_platform_instance( + platform="snowflake", + name="mydb.schema.tableB", + platform_instance="cloud", + ), ], downstream_urn=builder.make_dataset_urn( - "snowflake", "mydb.schema.tableC" + platform="snowflake", name="mydb.schema.tableC", env="DEV" ), ) ],