| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Copyright 2025 Collate | 
					
						
							|  |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Metadata DAG function builder | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  | import json | 
					
						
							| 
									
										
										
										
											2022-12-14 21:14:51 +05:30
										 |  |  | import tempfile | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | from airflow import DAG | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  | from openmetadata_managed_apis.utils.logger import set_operator_logger | 
					
						
							| 
									
										
										
										
											2022-07-28 14:46:25 +02:00
										 |  |  | from openmetadata_managed_apis.workflows.ingestion.common import ( | 
					
						
							| 
									
										
										
										
											2022-04-21 17:53:29 +02:00
										 |  |  |     build_dag, | 
					
						
							| 
									
										
										
										
											2022-06-02 20:40:53 +02:00
										 |  |  |     build_source, | 
					
						
							| 
									
										
										
										
											2022-04-28 12:20:03 +02:00
										 |  |  |     build_workflow_config_property, | 
					
						
							| 
									
										
										
										
											2025-04-29 08:19:13 +02:00
										 |  |  |     execute_workflow, | 
					
						
							| 
									
										
										
										
											2022-04-21 17:53:29 +02:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-14 21:14:51 +05:30
										 |  |  | from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import ( | 
					
						
							|  |  |  |     IngestionPipeline, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2022-04-20 14:10:40 +02:00
										 |  |  | from metadata.generated.schema.metadataIngestion.workflow import ( | 
					
						
							|  |  |  |     BulkSink, | 
					
						
							|  |  |  |     OpenMetadataWorkflowConfig, | 
					
						
							|  |  |  |     Processor, | 
					
						
							|  |  |  |     Stage, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  | from metadata.workflow.usage import UsageWorkflow | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-29 08:19:13 +02:00
										 |  |  | def usage_workflow( | 
					
						
							|  |  |  |     workflow_config: OpenMetadataWorkflowConfig, | 
					
						
							|  |  |  | ): | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  |     """
 | 
					
						
							|  |  |  |     Task that creates and runs the ingestion workflow. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The workflow_config gets cooked form the incoming | 
					
						
							|  |  |  |     ingestionPipeline. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     This is the callable used to create the PythonOperator | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     set_operator_logger(workflow_config) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-25 17:51:49 +01:00
										 |  |  |     config = json.loads( | 
					
						
							|  |  |  |         workflow_config.model_dump_json(exclude_defaults=False, mask_secrets=False) | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  |     workflow = UsageWorkflow.create(config) | 
					
						
							| 
									
										
										
										
											2025-04-29 08:19:13 +02:00
										 |  |  |     execute_workflow(workflow, workflow_config) | 
					
						
							| 
									
										
										
										
											2022-04-20 14:10:40 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 09:21:36 +02:00
										 |  |  | def build_usage_config_from_file( | 
					
						
							|  |  |  |     ingestion_pipeline: IngestionPipeline, filename: str | 
					
						
							|  |  |  | ) -> OpenMetadataWorkflowConfig: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Given a filename for the staging location, build | 
					
						
							|  |  |  |     the OpenMetadataWorkflowConfig | 
					
						
							|  |  |  |     :param ingestion_pipeline: IngestionPipeline with workflow info | 
					
						
							|  |  |  |     :param filename: staging location file | 
					
						
							|  |  |  |     :return: OpenMetadataWorkflowConfig | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-06-02 20:40:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     source = build_source(ingestion_pipeline) | 
					
						
							|  |  |  |     source.type = f"{source.type}-usage"  # Mark the source as usage | 
					
						
							| 
									
										
										
										
											2022-05-26 09:21:36 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return OpenMetadataWorkflowConfig( | 
					
						
							| 
									
										
										
										
											2022-06-02 20:40:53 +02:00
										 |  |  |         source=source, | 
					
						
							| 
									
										
										
										
											2022-06-27 22:24:20 +05:30
										 |  |  |         processor=Processor(type="query-parser", config={}), | 
					
						
							| 
									
										
										
										
											2022-05-26 09:21:36 +02:00
										 |  |  |         stage=Stage( | 
					
						
							|  |  |  |             type="table-usage", | 
					
						
							|  |  |  |             config={"filename": filename}, | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         bulkSink=BulkSink( | 
					
						
							|  |  |  |             type="metadata-usage", | 
					
						
							|  |  |  |             config={"filename": filename}, | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         workflowConfig=build_workflow_config_property(ingestion_pipeline), | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.root, | 
					
						
							| 
									
										
										
										
											2022-05-26 09:21:36 +02:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-20 14:10:40 +02:00
										 |  |  | def build_usage_workflow_config( | 
					
						
							|  |  |  |     ingestion_pipeline: IngestionPipeline, | 
					
						
							|  |  |  | ) -> OpenMetadataWorkflowConfig: | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  |     """
 | 
					
						
							|  |  |  |     Given an airflow_pipeline, prepare the workflow config JSON | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2022-06-02 20:40:53 +02:00
										 |  |  |     location = ingestion_pipeline.sourceConfig.config.stageFileLocation | 
					
						
							| 
									
										
										
										
											2022-04-20 14:10:40 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-05-26 09:21:36 +02:00
										 |  |  |     if not location: | 
					
						
							|  |  |  |         with tempfile.NamedTemporaryFile() as tmp_file: | 
					
						
							|  |  |  |             workflow_config = build_usage_config_from_file(ingestion_pipeline, tmp_file) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         workflow_config = build_usage_config_from_file(ingestion_pipeline, location) | 
					
						
							| 
									
										
										
										
											2022-04-20 14:10:40 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return workflow_config | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def build_usage_dag(airflow_pipeline: IngestionPipeline) -> DAG: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Build a simple metadata workflow DAG | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     workflow_config = build_usage_workflow_config(airflow_pipeline) | 
					
						
							| 
									
										
										
										
											2022-04-21 17:53:29 +02:00
										 |  |  |     dag = build_dag( | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  |         task_name="usage_task", | 
					
						
							|  |  |  |         ingestion_pipeline=airflow_pipeline, | 
					
						
							|  |  |  |         workflow_config=workflow_config, | 
					
						
							| 
									
										
										
										
											2023-08-30 15:49:42 +02:00
										 |  |  |         workflow_fn=usage_workflow, | 
					
						
							| 
									
										
										
										
											2022-04-11 18:38:26 +02:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return dag |