| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Copyright 2025 Collate | 
					
						
							|  |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | You can run this DAG from the default OM installation. | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | For this DAG to run properly we expected an OpenMetadata | 
					
						
							|  |  |  | Airflow connection named `openmetadata_conn_id`. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | from datetime import datetime | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | from textwrap import dedent | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | import requests | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | # The DAG object; we'll need this to instantiate a DAG | 
					
						
							|  |  |  | from airflow import DAG | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Operators; we need this to operate! | 
					
						
							|  |  |  | from airflow.operators.bash import BashOperator | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | from airflow_provider_openmetadata.hooks.openmetadata import OpenMetadataHook | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | # These args will get passed on to each operator | 
					
						
							|  |  |  | # You can override them on a per-task basis during operator initialization | 
					
						
							|  |  |  | from airflow_provider_openmetadata.lineage.operator import OpenMetadataLineageOperator | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | OM_HOST_PORT = "http://localhost:8585/api" | 
					
						
							|  |  |  | OM_JWT = "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" | 
					
						
							|  |  |  | AIRFLOW_HOST_API_ROOT = "http://localhost:8080/api/v1/" | 
					
						
							|  |  |  | DEFAULT_OM_AIRFLOW_CONNECTION = "openmetadata_conn_id" | 
					
						
							|  |  |  | DEFAULT_AIRFLOW_HEADERS = { | 
					
						
							|  |  |  |     "Content-Type": "application/json", | 
					
						
							|  |  |  |     "Authorization": "Basic YWRtaW46YWRtaW4=", | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | default_args = { | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |     "retries": 0, | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | # Create the default OpenMetadata Airflow Connection (if it does not exist) | 
					
						
							|  |  |  | res = requests.get( | 
					
						
							|  |  |  |     AIRFLOW_HOST_API_ROOT + f"connections/{DEFAULT_OM_AIRFLOW_CONNECTION}", | 
					
						
							|  |  |  |     headers=DEFAULT_AIRFLOW_HEADERS, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | if res.status_code == 404:  # not found | 
					
						
							|  |  |  |     requests.post( | 
					
						
							|  |  |  |         AIRFLOW_HOST_API_ROOT + "connections", | 
					
						
							|  |  |  |         json={ | 
					
						
							|  |  |  |             "connection_id": DEFAULT_OM_AIRFLOW_CONNECTION, | 
					
						
							|  |  |  |             "conn_type": "openmetadata", | 
					
						
							|  |  |  |             "host": "openmetadata-server", | 
					
						
							|  |  |  |             "schema": "http", | 
					
						
							|  |  |  |             "port": 8585, | 
					
						
							|  |  |  |             "password": OM_JWT, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         headers=DEFAULT_AIRFLOW_HEADERS, | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  | elif res.status_code != 200: | 
					
						
							|  |  |  |     raise RuntimeError(f"Could not fetch {DEFAULT_OM_AIRFLOW_CONNECTION} connection") | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | with DAG( | 
					
						
							|  |  |  |     "lineage_tutorial_operator", | 
					
						
							|  |  |  |     default_args=default_args, | 
					
						
							|  |  |  |     description="A simple tutorial DAG", | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |     schedule_interval=None, | 
					
						
							|  |  |  |     is_paused_upon_creation=True, | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  |     start_date=datetime(2021, 1, 1), | 
					
						
							|  |  |  |     catchup=False, | 
					
						
							|  |  |  |     tags=["example"], | 
					
						
							|  |  |  | ) as dag: | 
					
						
							|  |  |  |     # t1, t2 and t3 are examples of tasks created by instantiating operators | 
					
						
							|  |  |  |     t1 = BashOperator( | 
					
						
							|  |  |  |         task_id="print_date", | 
					
						
							|  |  |  |         bash_command="date", | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |         outlets={ | 
					
						
							|  |  |  |             "tables": [ | 
					
						
							|  |  |  |                 "test-service-table-lineage.test-db.test-schema.lineage-test-outlet" | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     t2 = BashOperator( | 
					
						
							|  |  |  |         task_id="sleep", | 
					
						
							|  |  |  |         depends_on_past=False, | 
					
						
							|  |  |  |         bash_command="sleep 1", | 
					
						
							|  |  |  |         retries=3, | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |         inlets={ | 
					
						
							|  |  |  |             "tables": [ | 
					
						
							| 
									
										
										
										
											2023-06-12 07:01:19 +02:00
										 |  |  |                 "test-service-table-lineage.test-db.test-schema.lineage-test-inlet", | 
					
						
							|  |  |  |                 "test-service-table-lineage.test-db.test-schema.lineage-test-inlet2", | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |             ] | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dag.doc_md = ( | 
					
						
							|  |  |  |         __doc__  # providing that you have a docstring at the beginning of the DAG | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     dag.doc_md = """
 | 
					
						
							|  |  |  |     This is a documentation placed anywhere | 
					
						
							|  |  |  |     """  # otherwise, type it like this
 | 
					
						
							|  |  |  |     templated_command = dedent( | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |     {% for i in range(5) %} | 
					
						
							|  |  |  |         echo "{{ ds }}" | 
					
						
							|  |  |  |         echo "{{ macros.ds_add(ds, 7)}}" | 
					
						
							|  |  |  |         echo "{{ params.my_param }}" | 
					
						
							|  |  |  |     {% endfor %} | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     t3 = BashOperator( | 
					
						
							|  |  |  |         task_id="templated", | 
					
						
							|  |  |  |         depends_on_past=False, | 
					
						
							|  |  |  |         bash_command=templated_command, | 
					
						
							|  |  |  |         params={"my_param": "Parameter I passed in"}, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     t1 >> [t2, t3] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     t4 = OpenMetadataLineageOperator( | 
					
						
							|  |  |  |         task_id="lineage_op", | 
					
						
							|  |  |  |         depends_on_past=False, | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |         server_config=OpenMetadataHook(DEFAULT_OM_AIRFLOW_CONNECTION).get_conn(), | 
					
						
							| 
									
										
										
										
											2022-12-10 19:54:41 +01:00
										 |  |  |         service_name="airflow_lineage_op_service", | 
					
						
							|  |  |  |         only_keep_dag_lineage=True, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-17 04:52:12 +01:00
										 |  |  |     [t1, t2, t3] >> t4 |