mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-23 08:38:02 +00:00
fix(airflow): Stable tag order in DataFlow/DataJobs (#5696)
This commit is contained in:
parent
1b626c7652
commit
c66c263ceb
@ -46,6 +46,7 @@ We recommend you use the lineage plugin if you are on Airflow version >= 2.0.2 o
|
||||
|---|---|---|
|
||||
| datahub.datahub_conn_id | datahub_rest_default | The name of the datahub connection you set in step 1. |
|
||||
| datahub.cluster | prod | name of the airflow cluster |
|
||||
| capture_executions | false | If true, it captures task runs as DataHub DataProcessInstances. |
|
||||
| datahub.capture_ownership_info | true | If true, the owners field of the DAG will be capture as a DataHub corpuser. |
|
||||
| datahub.capture_tags_info | true | If true, the tags field of the DAG will be captured as DataHub tags. |
|
||||
| datahub.graceful_exceptions | true | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions.|
|
||||
@ -101,6 +102,7 @@ If you are looking to run Airflow and DataHub using docker locally, follow the g
|
||||
datahub_kwargs = {
|
||||
"datahub_conn_id": "datahub_rest_default",
|
||||
"cluster": "prod",
|
||||
"capture_executions": true,
|
||||
"capture_ownership_info": true,
|
||||
"capture_tags_info": true,
|
||||
"graceful_exceptions": true }
|
||||
|
@ -75,7 +75,7 @@ class DataFlow:
|
||||
tags = GlobalTagsClass(
|
||||
tags=[
|
||||
TagAssociationClass(tag=builder.make_tag_urn(tag))
|
||||
for tag in (self.tags or [])
|
||||
for tag in (sorted(self.tags) or [])
|
||||
]
|
||||
)
|
||||
return [tags]
|
||||
|
@ -103,7 +103,7 @@ class DataJob:
|
||||
tags = GlobalTagsClass(
|
||||
tags=[
|
||||
TagAssociationClass(tag=builder.make_tag_urn(tag))
|
||||
for tag in (self.tags or [])
|
||||
for tag in (sorted(self.tags) or [])
|
||||
]
|
||||
)
|
||||
return [tags]
|
||||
|
@ -37,11 +37,9 @@ with DAG(
|
||||
task_id="run_data_task",
|
||||
dag=dag,
|
||||
bash_command="echo 'This is where you might run your data tooling.'",
|
||||
inlets={
|
||||
"datasets": [
|
||||
Dataset("snowflake", "mydb.schema.tableA"),
|
||||
Dataset("snowflake", "mydb.schema.tableB"),
|
||||
],
|
||||
},
|
||||
outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]},
|
||||
inlets=[
|
||||
Dataset("snowflake", "mydb.schema.tableA"),
|
||||
Dataset("snowflake", "mydb.schema.tableB"),
|
||||
],
|
||||
outlets=[Dataset("snowflake", "mydb.schema.tableC")],
|
||||
)
|
||||
|
@ -30,13 +30,11 @@ default_args = {
|
||||
)
|
||||
def datahub_lineage_backend_taskflow_demo():
|
||||
@task(
|
||||
inlets={
|
||||
"datasets": [
|
||||
Dataset("snowflake", "mydb.schema.tableA"),
|
||||
Dataset("snowflake", "mydb.schema.tableB"),
|
||||
],
|
||||
},
|
||||
outlets={"datasets": [Dataset("snowflake", "mydb.schema.tableC")]},
|
||||
inlets=[
|
||||
Dataset("snowflake", "mydb.schema.tableA"),
|
||||
Dataset("snowflake", "mydb.schema.tableB"),
|
||||
],
|
||||
outlets=[Dataset("snowflake", "mydb.schema.tableC")],
|
||||
)
|
||||
def run_data_task():
|
||||
# This is where you might run your data tooling.
|
||||
|
@ -120,6 +120,8 @@ class DatahubLineageBackend(LineageBackend):
|
||||
except Exception as e:
|
||||
if config.graceful_exceptions:
|
||||
operator.log.error(e)
|
||||
operator.log.info("Supressing error because graceful_exceptions is set")
|
||||
operator.log.info(
|
||||
"Suppressing error because graceful_exceptions is set"
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
Loading…
x
Reference in New Issue
Block a user