| 
									
										
										
										
											2021-12-01 12:46:28 +05:30
										 |  |  | #  Copyright 2021 Collate | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							|  |  |  | #  http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  | """
 | 
					
						
							|  |  |  | OpenMetadata Airflow Lineage Backend example. Airflow provides a pluggable lineage backend that can | 
					
						
							|  |  |  | read a DAG's configured inlets and outlets to compose a lineage. With OpenMetadata we have a airflow lineage backend | 
					
						
							|  |  |  | to get all of the workflows in Airflow and also any lineage user's configured. | 
					
						
							| 
									
										
										
										
											2022-12-02 20:12:06 +05:30
										 |  |  | Please refer to https://docs.open-metadata.org/connectors/pipeline/airflow/lineage-backend on how to configure the lineage backend | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  | with Airflow Scheduler | 
					
						
							|  |  |  | This is an example to demonstrate on how to configure a Airflow DAG's inlets and outlets | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from datetime import timedelta | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from airflow.decorators import dag, task | 
					
						
							|  |  |  | from airflow.utils.dates import days_ago | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  | from metadata.generated.schema.entity.data.container import Container | 
					
						
							|  |  |  | from metadata.generated.schema.entity.data.table import Table | 
					
						
							|  |  |  | from metadata.ingestion.source.pipeline.airflow.lineage_parser import OMEntity | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  | default_args = { | 
					
						
							|  |  |  |     "owner": "openmetadata_airflow_example", | 
					
						
							|  |  |  |     "depends_on_past": False, | 
					
						
							|  |  |  |     "email": ["user@company.com"], | 
					
						
							|  |  |  |     "execution_timeout": timedelta(minutes=5), | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @dag( | 
					
						
							|  |  |  |     default_args=default_args, | 
					
						
							| 
									
										
										
										
											2022-05-25 08:35:16 +02:00
										 |  |  |     dag_id="sample_lineage", | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  |     description="OpenMetadata Airflow Lineage example DAG", | 
					
						
							|  |  |  |     schedule_interval=timedelta(days=1), | 
					
						
							|  |  |  |     start_date=days_ago(1), | 
					
						
							|  |  |  |     catchup=False, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | def openmetadata_airflow_lineage_example(): | 
					
						
							|  |  |  |     @task( | 
					
						
							|  |  |  |         inlets={ | 
					
						
							|  |  |  |             "tables": [ | 
					
						
							| 
									
										
										
										
											2022-05-25 08:35:16 +02:00
										 |  |  |                 "sample_data.ecommerce_db.shopify.raw_order", | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  |             ], | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2022-05-25 08:35:16 +02:00
										 |  |  |         outlets={"tables": ["sample_data.ecommerce_db.shopify.fact_order"]}, | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  |     ) | 
					
						
							|  |  |  |     def generate_data(): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  |     @task( | 
					
						
							|  |  |  |         inlets=[ | 
					
						
							|  |  |  |             OMEntity(entity=Container, fqn="s3_storage_sample.transactions", key="test") | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |         outlets=[ | 
					
						
							|  |  |  |             OMEntity( | 
					
						
							|  |  |  |                 entity=Table, | 
					
						
							|  |  |  |                 fqn="sample_data.ecommerce_db.shopify.raw_order", | 
					
						
							|  |  |  |                 key="test", | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     def generate_data2(): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-15 15:08:09 +02:00
										 |  |  |     @task( | 
					
						
							|  |  |  |         inlets=[ | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "entity": "container", | 
					
						
							|  |  |  |                 "fqn": "s3_storage_sample.departments", | 
					
						
							|  |  |  |                 "key": "test", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |         outlets=[ | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "entity": "table", | 
					
						
							|  |  |  |                 "fqn": "sample_data.ecommerce_db.shopify.raw_order", | 
					
						
							|  |  |  |                 "key": "test", | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     def generate_data3(): | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  |     generate_data() | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  |     generate_data2() | 
					
						
							| 
									
										
										
										
											2024-05-15 15:08:09 +02:00
										 |  |  |     generate_data3() | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-11-29 16:57:39 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-12 21:03:15 -07:00
										 |  |  | openmetadata_airflow_lineage_example_dag = openmetadata_airflow_lineage_example() |