2021-04-12 17:40:15 -07:00
|
|
|
"""Lineage Backend
|
|
|
|
|
|
|
|
An example DAG demonstrating the usage of DataHub's Airflow lineage backend.
|
|
|
|
"""
|
|
|
|
|
|
|
|
from datetime import timedelta
|
|
|
|
|
|
|
|
from airflow import DAG
|
2022-11-01 21:12:34 -07:00
|
|
|
from airflow.operators.bash import BashOperator
|
2021-04-12 17:40:15 -07:00
|
|
|
from airflow.utils.dates import days_ago
|
|
|
|
|
2021-05-12 15:01:11 -07:00
|
|
|
from datahub_provider.entities import Dataset
|
2021-04-12 17:40:15 -07:00
|
|
|
|
|
|
|
default_args = {
|
|
|
|
"owner": "airflow",
|
|
|
|
"depends_on_past": False,
|
|
|
|
"email": ["jdoe@example.com"],
|
|
|
|
"email_on_failure": False,
|
|
|
|
"execution_timeout": timedelta(minutes=5),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
with DAG(
|
|
|
|
"datahub_lineage_backend_demo",
|
|
|
|
default_args=default_args,
|
|
|
|
description="An example DAG demonstrating the usage of DataHub's Airflow lineage backend.",
|
|
|
|
schedule_interval=timedelta(days=1),
|
|
|
|
start_date=days_ago(2),
|
2021-04-23 00:18:39 -07:00
|
|
|
tags=["example_tag"],
|
2021-04-12 17:40:15 -07:00
|
|
|
catchup=False,
|
|
|
|
) as dag:
|
|
|
|
task1 = BashOperator(
|
|
|
|
task_id="run_data_task",
|
|
|
|
dag=dag,
|
|
|
|
bash_command="echo 'This is where you might run your data tooling.'",
|
2022-08-23 08:40:53 +02:00
|
|
|
inlets=[
|
|
|
|
Dataset("snowflake", "mydb.schema.tableA"),
|
|
|
|
Dataset("snowflake", "mydb.schema.tableB"),
|
|
|
|
],
|
|
|
|
outlets=[Dataset("snowflake", "mydb.schema.tableC")],
|
2021-04-12 17:40:15 -07:00
|
|
|
)
|