mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(ariflow-plugin): ability to disable datajob lineage (#13187)
This commit is contained in:
parent
5ee0b66920
commit
60b769fbf6
@ -79,6 +79,7 @@ enabled = True # default
|
|||||||
| disable_openlineage_plugin | true | Disable the OpenLineage plugin to avoid duplicative processing. |
|
| disable_openlineage_plugin | true | Disable the OpenLineage plugin to avoid duplicative processing. |
|
||||||
| log_level | _no change_ | [debug] Set the log level for the plugin. |
|
| log_level | _no change_ | [debug] Set the log level for the plugin. |
|
||||||
| debug_emitter | false | [debug] If true, the plugin will log the emitted events. |
|
| debug_emitter | false | [debug] If true, the plugin will log the emitted events. |
|
||||||
|
| enable_datajob_lineage | true | If true, the plugin will emit input/output lineage for DataJobs. |
|
||||||
|
|
||||||
## DataHub Plugin v1
|
## DataHub Plugin v1
|
||||||
|
|
||||||
@ -148,6 +149,7 @@ conn_id = datahub_rest_default # or datahub_kafka_default
|
|||||||
| |
|
| |
|
||||||
| graceful_exceptions | true | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. |
|
| graceful_exceptions | true | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. |
|
||||||
| dag_filter_str | { "allow": [".*"] } | AllowDenyPattern value in form of JSON string to filter the DAGs from running. |
|
| dag_filter_str | { "allow": [".*"] } | AllowDenyPattern value in form of JSON string to filter the DAGs from running. |
|
||||||
|
| enable_datajob_lineage | true | If true, the plugin will emit input/output lineage for DataJobs. |
|
||||||
|
|
||||||
#### Validate that the plugin is working
|
#### Validate that the plugin is working
|
||||||
|
|
||||||
|
@ -67,6 +67,9 @@ class DatahubLineageConfig(ConfigModel):
|
|||||||
# Makes extraction of jinja-templated fields more accurate.
|
# Makes extraction of jinja-templated fields more accurate.
|
||||||
render_templates: bool = True
|
render_templates: bool = True
|
||||||
|
|
||||||
|
# Only if true, lineage will be emitted for the DataJobs.
|
||||||
|
enable_datajob_lineage: bool = True
|
||||||
|
|
||||||
dag_filter_pattern: AllowDenyPattern = Field(
|
dag_filter_pattern: AllowDenyPattern = Field(
|
||||||
default=AllowDenyPattern.allow_all(),
|
default=AllowDenyPattern.allow_all(),
|
||||||
description="regex patterns for DAGs to ingest",
|
description="regex patterns for DAGs to ingest",
|
||||||
@ -127,6 +130,7 @@ def get_lineage_config() -> DatahubLineageConfig:
|
|||||||
dag_filter_pattern = AllowDenyPattern.parse_raw(
|
dag_filter_pattern = AllowDenyPattern.parse_raw(
|
||||||
conf.get("datahub", "dag_filter_str", fallback='{"allow": [".*"]}')
|
conf.get("datahub", "dag_filter_str", fallback='{"allow": [".*"]}')
|
||||||
)
|
)
|
||||||
|
enable_lineage = conf.get("datahub", "enable_datajob_lineage", fallback=True)
|
||||||
|
|
||||||
return DatahubLineageConfig(
|
return DatahubLineageConfig(
|
||||||
enabled=enabled,
|
enabled=enabled,
|
||||||
@ -145,4 +149,5 @@ def get_lineage_config() -> DatahubLineageConfig:
|
|||||||
datajob_url_link=datajob_url_link,
|
datajob_url_link=datajob_url_link,
|
||||||
render_templates=render_templates,
|
render_templates=render_templates,
|
||||||
dag_filter_pattern=dag_filter_pattern,
|
dag_filter_pattern=dag_filter_pattern,
|
||||||
|
enable_datajob_lineage=enable_lineage,
|
||||||
)
|
)
|
||||||
|
@ -260,6 +260,9 @@ class DataHubListener:
|
|||||||
routine is also responsible for converting the lineage to DataHub URNs.
|
routine is also responsible for converting the lineage to DataHub URNs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if not self.config.enable_datajob_lineage:
|
||||||
|
return
|
||||||
|
|
||||||
input_urns: List[str] = []
|
input_urns: List[str] = []
|
||||||
output_urns: List[str] = []
|
output_urns: List[str] = []
|
||||||
fine_grained_lineages: List[FineGrainedLineageClass] = []
|
fine_grained_lineages: List[FineGrainedLineageClass] = []
|
||||||
@ -450,7 +453,8 @@ class DataHubListener:
|
|||||||
# TODO: Add handling for Airflow mapped tasks using task_instance.map_index
|
# TODO: Add handling for Airflow mapped tasks using task_instance.map_index
|
||||||
|
|
||||||
for mcp in datajob.generate_mcp(
|
for mcp in datajob.generate_mcp(
|
||||||
materialize_iolets=self.config.materialize_iolets
|
generate_lineage=self.config.enable_datajob_lineage,
|
||||||
|
materialize_iolets=self.config.materialize_iolets,
|
||||||
):
|
):
|
||||||
self.emitter.emit(mcp, self._make_emit_callback())
|
self.emitter.emit(mcp, self._make_emit_callback())
|
||||||
logger.debug(f"Emitted DataHub Datajob start: {datajob}")
|
logger.debug(f"Emitted DataHub Datajob start: {datajob}")
|
||||||
@ -536,7 +540,8 @@ class DataHubListener:
|
|||||||
self._extract_lineage(datajob, dagrun, task, task_instance, complete=True)
|
self._extract_lineage(datajob, dagrun, task, task_instance, complete=True)
|
||||||
|
|
||||||
for mcp in datajob.generate_mcp(
|
for mcp in datajob.generate_mcp(
|
||||||
materialize_iolets=self.config.materialize_iolets
|
generate_lineage=self.config.enable_datajob_lineage,
|
||||||
|
materialize_iolets=self.config.materialize_iolets,
|
||||||
):
|
):
|
||||||
self.emitter.emit(mcp, self._make_emit_callback())
|
self.emitter.emit(mcp, self._make_emit_callback())
|
||||||
logger.debug(f"Emitted DataHub Datajob finish w/ status {status}: {datajob}")
|
logger.debug(f"Emitted DataHub Datajob finish w/ status {status}: {datajob}")
|
||||||
|
@ -132,7 +132,10 @@ def datahub_task_status_callback(context, status):
|
|||||||
)
|
)
|
||||||
|
|
||||||
task.log.info(f"Emitting Datahub Datajob: {datajob}")
|
task.log.info(f"Emitting Datahub Datajob: {datajob}")
|
||||||
for mcp in datajob.generate_mcp(materialize_iolets=config.materialize_iolets):
|
for mcp in datajob.generate_mcp(
|
||||||
|
generate_lineage=config.enable_datajob_lineage,
|
||||||
|
materialize_iolets=config.materialize_iolets,
|
||||||
|
):
|
||||||
emitter.emit(mcp, _make_emit_callback(task.log))
|
emitter.emit(mcp, _make_emit_callback(task.log))
|
||||||
|
|
||||||
if config.capture_executions:
|
if config.capture_executions:
|
||||||
@ -199,7 +202,10 @@ def datahub_pre_execution(context):
|
|||||||
)
|
)
|
||||||
|
|
||||||
task.log.info(f"Emitting Datahub dataJob {datajob}")
|
task.log.info(f"Emitting Datahub dataJob {datajob}")
|
||||||
for mcp in datajob.generate_mcp(materialize_iolets=config.materialize_iolets):
|
for mcp in datajob.generate_mcp(
|
||||||
|
generate_lineage=config.enable_datajob_lineage,
|
||||||
|
materialize_iolets=config.materialize_iolets,
|
||||||
|
):
|
||||||
emitter.emit(mcp, _make_emit_callback(task.log))
|
emitter.emit(mcp, _make_emit_callback(task.log))
|
||||||
|
|
||||||
if config.capture_executions:
|
if config.capture_executions:
|
||||||
|
@ -56,7 +56,10 @@ def send_lineage_to_datahub(
|
|||||||
entities_to_datajob_urn_list([let.urn for let in inlets])
|
entities_to_datajob_urn_list([let.urn for let in inlets])
|
||||||
)
|
)
|
||||||
|
|
||||||
for mcp in datajob.generate_mcp(materialize_iolets=config.materialize_iolets):
|
for mcp in datajob.generate_mcp(
|
||||||
|
generate_lineage=config.enable_datajob_lineage,
|
||||||
|
materialize_iolets=config.materialize_iolets,
|
||||||
|
):
|
||||||
emitter.emit(mcp)
|
emitter.emit(mcp)
|
||||||
operator.log.info(f"Emitted from Lineage: {datajob}")
|
operator.log.info(f"Emitted from Lineage: {datajob}")
|
||||||
|
|
||||||
|
@ -0,0 +1,687 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataFlowInfo",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"_access_control": "None",
|
||||||
|
"catchup": "False",
|
||||||
|
"description": "'A simple DAG that runs a few fake data tasks.'",
|
||||||
|
"doc_md": "None",
|
||||||
|
"fileloc": "<fileloc>",
|
||||||
|
"is_paused_upon_creation": "None",
|
||||||
|
"start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))",
|
||||||
|
"tags": "[]",
|
||||||
|
"timezone": "Timezone('UTC')"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/tree?dag_id=simple_dag",
|
||||||
|
"name": "simple_dag",
|
||||||
|
"description": "A simple DAG that runs a few fake data tasks.",
|
||||||
|
"env": "PROD"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "ownership",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:airflow",
|
||||||
|
"type": "DEVELOPER",
|
||||||
|
"source": {
|
||||||
|
"type": "SERVICE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ownerTypes": {},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataFlow",
|
||||||
|
"entityUrn": "urn:li:dataFlow:(airflow,myairflow.simple_dag,prod)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "browsePathsV2",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"path": [
|
||||||
|
{
|
||||||
|
"id": "myairflow",
|
||||||
|
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "simple_dag"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataJobInfo",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"depends_on_past": "False",
|
||||||
|
"email": "None",
|
||||||
|
"label": "'task_1'",
|
||||||
|
"execution_timeout": "None",
|
||||||
|
"sla": "None",
|
||||||
|
"task_id": "'task_1'",
|
||||||
|
"trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
|
||||||
|
"wait_for_downstream": "False",
|
||||||
|
"downstream_task_ids": "['run_another_data_task']",
|
||||||
|
"inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.test_dag,PROD),test_task)')]",
|
||||||
|
"outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
|
||||||
|
"name": "task_1",
|
||||||
|
"type": {
|
||||||
|
"string": "COMMAND"
|
||||||
|
},
|
||||||
|
"env": "PROD"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "ownership",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:airflow",
|
||||||
|
"type": "DEVELOPER",
|
||||||
|
"source": {
|
||||||
|
"type": "SERVICE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ownerTypes": {},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceProperties",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"run_id": "manual_run_test",
|
||||||
|
"duration": "<duration>",
|
||||||
|
"start_date": "<start_date>",
|
||||||
|
"end_date": "<end_date>",
|
||||||
|
"execution_date": "2023-09-27 21:34:38+00:00",
|
||||||
|
"try_number": "0",
|
||||||
|
"max_tries": "0",
|
||||||
|
"external_executor_id": "None",
|
||||||
|
"state": "running",
|
||||||
|
"operator": "BashOperator",
|
||||||
|
"priority_weight": "2",
|
||||||
|
"log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
|
||||||
|
"orchestrator": "airflow",
|
||||||
|
"dag_id": "simple_dag",
|
||||||
|
"task_id": "task_1"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1",
|
||||||
|
"name": "simple_dag_task_1_manual_run_test",
|
||||||
|
"type": "BATCH_AD_HOC",
|
||||||
|
"created": {
|
||||||
|
"time": 1744380570011,
|
||||||
|
"actor": "urn:li:corpuser:datahub"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceRelationships",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"upstreamInstances": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceRunEvent",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"timestampMillis": 1744380570011,
|
||||||
|
"partitionSpec": {
|
||||||
|
"partition": "FULL_TABLE_SNAPSHOT",
|
||||||
|
"type": "FULL_TABLE"
|
||||||
|
},
|
||||||
|
"status": "STARTED",
|
||||||
|
"attempt": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataJobInfo",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"depends_on_past": "False",
|
||||||
|
"email": "None",
|
||||||
|
"label": "'task_1'",
|
||||||
|
"execution_timeout": "None",
|
||||||
|
"sla": "None",
|
||||||
|
"task_id": "'task_1'",
|
||||||
|
"trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
|
||||||
|
"wait_for_downstream": "False",
|
||||||
|
"downstream_task_ids": "['run_another_data_task']",
|
||||||
|
"inlets": "[Dataset(platform='snowflake', name='mydb.schema.tableA', env='PROD', platform_instance=None), Urn(_urn='urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableC,PROD)'), Urn(_urn='urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.test_dag,PROD),test_task)')]",
|
||||||
|
"outlets": "[Dataset(platform='snowflake', name='mydb.schema.tableD', env='PROD', platform_instance=None)]"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=task_1",
|
||||||
|
"name": "task_1",
|
||||||
|
"type": {
|
||||||
|
"string": "COMMAND"
|
||||||
|
},
|
||||||
|
"env": "PROD"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "ownership",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:airflow",
|
||||||
|
"type": "DEVELOPER",
|
||||||
|
"source": {
|
||||||
|
"type": "SERVICE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ownerTypes": {},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),task_1)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:fdbbbcd638bc0e91bbf8d7775efbecaf",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceRunEvent",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"timestampMillis": 1744380570147,
|
||||||
|
"partitionSpec": {
|
||||||
|
"partition": "FULL_TABLE_SNAPSHOT",
|
||||||
|
"type": "FULL_TABLE"
|
||||||
|
},
|
||||||
|
"status": "COMPLETE",
|
||||||
|
"result": {
|
||||||
|
"type": "SUCCESS",
|
||||||
|
"nativeResultType": "airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataJobInfo",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"depends_on_past": "False",
|
||||||
|
"email": "None",
|
||||||
|
"label": "'run_another_data_task'",
|
||||||
|
"execution_timeout": "None",
|
||||||
|
"sla": "None",
|
||||||
|
"task_id": "'run_another_data_task'",
|
||||||
|
"trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
|
||||||
|
"wait_for_downstream": "False",
|
||||||
|
"downstream_task_ids": "[]",
|
||||||
|
"inlets": "[]",
|
||||||
|
"outlets": "[]"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
|
||||||
|
"name": "run_another_data_task",
|
||||||
|
"type": {
|
||||||
|
"string": "COMMAND"
|
||||||
|
},
|
||||||
|
"env": "PROD"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "ownership",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:airflow",
|
||||||
|
"type": "DEVELOPER",
|
||||||
|
"source": {
|
||||||
|
"type": "SERVICE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ownerTypes": {},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceProperties",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"run_id": "manual_run_test",
|
||||||
|
"duration": "<duration>",
|
||||||
|
"start_date": "<start_date>",
|
||||||
|
"end_date": "<end_date>",
|
||||||
|
"execution_date": "2023-09-27 21:34:38+00:00",
|
||||||
|
"try_number": "0",
|
||||||
|
"max_tries": "0",
|
||||||
|
"external_executor_id": "None",
|
||||||
|
"state": "running",
|
||||||
|
"operator": "BashOperator",
|
||||||
|
"priority_weight": "1",
|
||||||
|
"log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
|
||||||
|
"orchestrator": "airflow",
|
||||||
|
"dag_id": "simple_dag",
|
||||||
|
"task_id": "run_another_data_task"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1",
|
||||||
|
"name": "simple_dag_run_another_data_task_manual_run_test",
|
||||||
|
"type": "BATCH_AD_HOC",
|
||||||
|
"created": {
|
||||||
|
"time": 1744380573217,
|
||||||
|
"actor": "urn:li:corpuser:datahub"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceRelationships",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"upstreamInstances": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceRunEvent",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"timestampMillis": 1744380573217,
|
||||||
|
"partitionSpec": {
|
||||||
|
"partition": "FULL_TABLE_SNAPSHOT",
|
||||||
|
"type": "FULL_TABLE"
|
||||||
|
},
|
||||||
|
"status": "STARTED",
|
||||||
|
"attempt": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataJobInfo",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"customProperties": {
|
||||||
|
"depends_on_past": "False",
|
||||||
|
"email": "None",
|
||||||
|
"label": "'run_another_data_task'",
|
||||||
|
"execution_timeout": "None",
|
||||||
|
"sla": "None",
|
||||||
|
"task_id": "'run_another_data_task'",
|
||||||
|
"trigger_rule": "<TriggerRule.ALL_SUCCESS: 'all_success'>",
|
||||||
|
"wait_for_downstream": "False",
|
||||||
|
"downstream_task_ids": "[]",
|
||||||
|
"inlets": "[]",
|
||||||
|
"outlets": "[]"
|
||||||
|
},
|
||||||
|
"externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=simple_dag&_flt_3_task_id=run_another_data_task",
|
||||||
|
"name": "run_another_data_task",
|
||||||
|
"type": {
|
||||||
|
"string": "COMMAND"
|
||||||
|
},
|
||||||
|
"env": "PROD"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataPlatformInstance",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"platform": "urn:li:dataPlatform:airflow",
|
||||||
|
"instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,myairflow)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "status",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"removed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "ownership",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"owners": [
|
||||||
|
{
|
||||||
|
"owner": "urn:li:corpuser:airflow",
|
||||||
|
"type": "DEVELOPER",
|
||||||
|
"source": {
|
||||||
|
"type": "SERVICE"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ownerTypes": {},
|
||||||
|
"lastModified": {
|
||||||
|
"time": 0,
|
||||||
|
"actor": "urn:li:corpuser:airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataJob",
|
||||||
|
"entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,myairflow.simple_dag,prod),run_another_data_task)",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "globalTags",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"tags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entityType": "dataProcessInstance",
|
||||||
|
"entityUrn": "urn:li:dataProcessInstance:888f71b79d9a0b162fe44acad7b2c2ae",
|
||||||
|
"changeType": "UPSERT",
|
||||||
|
"aspectName": "dataProcessInstanceRunEvent",
|
||||||
|
"aspect": {
|
||||||
|
"json": {
|
||||||
|
"timestampMillis": 1744380573438,
|
||||||
|
"partitionSpec": {
|
||||||
|
"partition": "FULL_TABLE_SNAPSHOT",
|
||||||
|
"type": "FULL_TABLE"
|
||||||
|
},
|
||||||
|
"status": "COMPLETE",
|
||||||
|
"result": {
|
||||||
|
"type": "SUCCESS",
|
||||||
|
"nativeResultType": "airflow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
@ -182,6 +182,7 @@ def _run_airflow(
|
|||||||
is_v1: bool,
|
is_v1: bool,
|
||||||
multiple_connections: bool,
|
multiple_connections: bool,
|
||||||
platform_instance: Optional[str],
|
platform_instance: Optional[str],
|
||||||
|
enable_datajob_lineage: bool,
|
||||||
) -> Iterator[AirflowInstance]:
|
) -> Iterator[AirflowInstance]:
|
||||||
airflow_home = tmp_path / "airflow_home"
|
airflow_home = tmp_path / "airflow_home"
|
||||||
print(f"Using airflow home: {airflow_home}")
|
print(f"Using airflow home: {airflow_home}")
|
||||||
@ -257,6 +258,9 @@ def _run_airflow(
|
|||||||
"AIRFLOW__DATAHUB__LOG_LEVEL": "DEBUG",
|
"AIRFLOW__DATAHUB__LOG_LEVEL": "DEBUG",
|
||||||
"AIRFLOW__DATAHUB__DEBUG_EMITTER": "True",
|
"AIRFLOW__DATAHUB__DEBUG_EMITTER": "True",
|
||||||
"SQLALCHEMY_SILENCE_UBER_WARNING": "1",
|
"SQLALCHEMY_SILENCE_UBER_WARNING": "1",
|
||||||
|
"AIRFLOW__DATAHUB__ENABLE_DATAJOB_LINEAGE": "true"
|
||||||
|
if enable_datajob_lineage
|
||||||
|
else "false",
|
||||||
}
|
}
|
||||||
|
|
||||||
if platform_instance:
|
if platform_instance:
|
||||||
@ -371,12 +375,27 @@ class DagTestCase:
|
|||||||
v2_only: bool = False
|
v2_only: bool = False
|
||||||
multiple_connections: bool = False
|
multiple_connections: bool = False
|
||||||
platform_instance: Optional[str] = None
|
platform_instance: Optional[str] = None
|
||||||
|
enable_datajob_lineage: bool = True
|
||||||
|
|
||||||
|
# used to identify the test case in the golden file when same DAG is used in multiple tests
|
||||||
|
test_variant: Optional[str] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dag_test_id(self) -> str:
|
||||||
|
return f"{self.dag_id}{self.test_variant or ''}"
|
||||||
|
|
||||||
|
|
||||||
test_cases = [
|
test_cases = [
|
||||||
DagTestCase(
|
DagTestCase(
|
||||||
"simple_dag", multiple_connections=True, platform_instance=PLATFORM_INSTANCE
|
"simple_dag", multiple_connections=True, platform_instance=PLATFORM_INSTANCE
|
||||||
),
|
),
|
||||||
|
DagTestCase(
|
||||||
|
"simple_dag",
|
||||||
|
multiple_connections=True,
|
||||||
|
platform_instance=PLATFORM_INSTANCE,
|
||||||
|
enable_datajob_lineage=False,
|
||||||
|
test_variant="_no_datajob_lineage",
|
||||||
|
),
|
||||||
DagTestCase("basic_iolets", platform_instance=PLATFORM_INSTANCE),
|
DagTestCase("basic_iolets", platform_instance=PLATFORM_INSTANCE),
|
||||||
DagTestCase("dag_to_skip", v2_only=True, platform_instance=PLATFORM_INSTANCE),
|
DagTestCase("dag_to_skip", v2_only=True, platform_instance=PLATFORM_INSTANCE),
|
||||||
DagTestCase("snowflake_operator", success=False, v2_only=True),
|
DagTestCase("snowflake_operator", success=False, v2_only=True),
|
||||||
@ -395,10 +414,10 @@ test_cases = [
|
|||||||
[
|
[
|
||||||
*[
|
*[
|
||||||
pytest.param(
|
pytest.param(
|
||||||
f"v1_{test_case.dag_id}",
|
f"v1_{test_case.dag_test_id}",
|
||||||
test_case,
|
test_case,
|
||||||
True,
|
True,
|
||||||
id=f"v1_{test_case.dag_id}",
|
id=f"v1_{test_case.dag_test_id}",
|
||||||
marks=pytest.mark.skipif(
|
marks=pytest.mark.skipif(
|
||||||
AIRFLOW_VERSION >= packaging.version.parse("2.4.0"),
|
AIRFLOW_VERSION >= packaging.version.parse("2.4.0"),
|
||||||
reason="We only test the v1 plugin on Airflow 2.3",
|
reason="We only test the v1 plugin on Airflow 2.3",
|
||||||
@ -411,16 +430,16 @@ test_cases = [
|
|||||||
pytest.param(
|
pytest.param(
|
||||||
# On Airflow 2.3-2.4, test plugin v2 without dataFlows.
|
# On Airflow 2.3-2.4, test plugin v2 without dataFlows.
|
||||||
(
|
(
|
||||||
f"v2_{test_case.dag_id}"
|
f"v2_{test_case.dag_test_id}"
|
||||||
if HAS_AIRFLOW_DAG_LISTENER_API
|
if HAS_AIRFLOW_DAG_LISTENER_API
|
||||||
else f"v2_{test_case.dag_id}_no_dag_listener"
|
else f"v2_{test_case.dag_test_id}_no_dag_listener"
|
||||||
),
|
),
|
||||||
test_case,
|
test_case,
|
||||||
False,
|
False,
|
||||||
id=(
|
id=(
|
||||||
f"v2_{test_case.dag_id}"
|
f"v2_{test_case.dag_test_id}"
|
||||||
if HAS_AIRFLOW_DAG_LISTENER_API
|
if HAS_AIRFLOW_DAG_LISTENER_API
|
||||||
else f"v2_{test_case.dag_id}_no_dag_listener"
|
else f"v2_{test_case.dag_test_id}_no_dag_listener"
|
||||||
),
|
),
|
||||||
marks=[
|
marks=[
|
||||||
pytest.mark.skipif(
|
pytest.mark.skipif(
|
||||||
@ -466,6 +485,7 @@ def test_airflow_plugin(
|
|||||||
is_v1=is_v1,
|
is_v1=is_v1,
|
||||||
multiple_connections=test_case.multiple_connections,
|
multiple_connections=test_case.multiple_connections,
|
||||||
platform_instance=test_case.platform_instance,
|
platform_instance=test_case.platform_instance,
|
||||||
|
enable_datajob_lineage=test_case.enable_datajob_lineage,
|
||||||
) as airflow_instance:
|
) as airflow_instance:
|
||||||
print(f"Running DAG {dag_id}...")
|
print(f"Running DAG {dag_id}...")
|
||||||
_wait_for_dag_to_load(airflow_instance, dag_id)
|
_wait_for_dag_to_load(airflow_instance, dag_id)
|
||||||
@ -573,6 +593,7 @@ if __name__ == "__main__":
|
|||||||
is_v1=not HAS_AIRFLOW_LISTENER_API,
|
is_v1=not HAS_AIRFLOW_LISTENER_API,
|
||||||
multiple_connections=False,
|
multiple_connections=False,
|
||||||
platform_instance=None,
|
platform_instance=None,
|
||||||
|
enable_datajob_lineage=True,
|
||||||
) as airflow_instance:
|
) as airflow_instance:
|
||||||
# input("Press enter to exit...")
|
# input("Press enter to exit...")
|
||||||
print("quitting airflow")
|
print("quitting airflow")
|
||||||
|
@ -108,7 +108,9 @@ class DataJob:
|
|||||||
return [tags]
|
return [tags]
|
||||||
|
|
||||||
def generate_mcp(
|
def generate_mcp(
|
||||||
self, materialize_iolets: bool = True
|
self,
|
||||||
|
generate_lineage: bool = True,
|
||||||
|
materialize_iolets: bool = True,
|
||||||
) -> Iterable[MetadataChangeProposalWrapper]:
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
||||||
env: Optional[str] = None
|
env: Optional[str] = None
|
||||||
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
|
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
|
||||||
@ -152,9 +154,10 @@ class DataJob:
|
|||||||
)
|
)
|
||||||
yield mcp
|
yield mcp
|
||||||
|
|
||||||
yield from self.generate_data_input_output_mcp(
|
if generate_lineage:
|
||||||
materialize_iolets=materialize_iolets
|
yield from self.generate_data_input_output_mcp(
|
||||||
)
|
materialize_iolets=materialize_iolets
|
||||||
|
)
|
||||||
|
|
||||||
for owner in self.generate_ownership_aspect():
|
for owner in self.generate_ownership_aspect():
|
||||||
mcp = MetadataChangeProposalWrapper(
|
mcp = MetadataChangeProposalWrapper(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user