mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-04 12:36:23 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			90 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			90 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#  Copyright 2025 Collate
 | 
						|
#  Licensed under the Collate Community License, Version 1.0 (the "License");
 | 
						|
#  you may not use this file except in compliance with the License.
 | 
						|
#  You may obtain a copy of the License at
 | 
						|
#  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
 | 
						|
#  Unless required by applicable law or agreed to in writing, software
 | 
						|
#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
#  See the License for the specific language governing permissions and
 | 
						|
#  limitations under the License.
 | 
						|
 | 
						|
"""
 | 
						|
This DAG can be used directly in your Airflow instance after installing
 | 
						|
the `openmetadata-ingestion[airflow-container]` package. Its purpose
 | 
						|
is to connect to the underlying database, retrieve the information
 | 
						|
and push it to OpenMetadata.
 | 
						|
"""
 | 
						|
from datetime import timedelta
 | 
						|
 | 
						|
import yaml
 | 
						|
from airflow import DAG
 | 
						|
 | 
						|
try:
 | 
						|
    from airflow.operators.python import PythonOperator
 | 
						|
except ModuleNotFoundError:
 | 
						|
    from airflow.operators.python_operator import PythonOperator
 | 
						|
 | 
						|
from airflow.utils.dates import days_ago
 | 
						|
 | 
						|
from metadata.workflow.metadata import MetadataWorkflow
 | 
						|
 | 
						|
default_args = {
 | 
						|
    "owner": "user_name",
 | 
						|
    "email": ["username@org.com"],
 | 
						|
    "email_on_failure": False,
 | 
						|
    "retries": 3,
 | 
						|
    "retry_delay": timedelta(minutes=5),
 | 
						|
    "execution_timeout": timedelta(minutes=60),
 | 
						|
}
 | 
						|
 | 
						|
config = """
 | 
						|
source:
 | 
						|
  type: airflow
 | 
						|
  serviceName: airflow_source
 | 
						|
  serviceConnection:
 | 
						|
    config:
 | 
						|
      type: Airflow
 | 
						|
      hostPort: http://localhost:8080
 | 
						|
      numberOfStatus: 10
 | 
						|
      connection:
 | 
						|
        type: Backend
 | 
						|
  sourceConfig:
 | 
						|
    config:
 | 
						|
      type: PipelineMetadata
 | 
						|
sink:
 | 
						|
  type: metadata-rest
 | 
						|
  config: {}
 | 
						|
workflowConfig:
 | 
						|
  loggerLevel: INFO
 | 
						|
  openMetadataServerConfig:
 | 
						|
    hostPort: http://openmetadata-server:8585/api
 | 
						|
    authProvider: openmetadata
 | 
						|
    securityConfig:
 | 
						|
      jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
 | 
						|
"""
 | 
						|
 | 
						|
 | 
						|
def metadata_ingestion_workflow():
 | 
						|
    workflow_config = yaml.safe_load(config)
 | 
						|
    workflow = MetadataWorkflow.create(workflow_config)
 | 
						|
    workflow.execute()
 | 
						|
    workflow.raise_from_status()
 | 
						|
    workflow.print_status()
 | 
						|
    workflow.stop()
 | 
						|
 | 
						|
 | 
						|
with DAG(
 | 
						|
    "airflow_metadata_extraction",
 | 
						|
    default_args=default_args,
 | 
						|
    description="An example DAG which pushes Airflow data to OM",
 | 
						|
    start_date=days_ago(1),
 | 
						|
    is_paused_upon_creation=True,
 | 
						|
    schedule_interval="*/5 * * * *",
 | 
						|
    catchup=False,
 | 
						|
) as dag:
 | 
						|
    ingest_task = PythonOperator(
 | 
						|
        task_id="ingest_using_recipe",
 | 
						|
        python_callable=metadata_ingestion_workflow,
 | 
						|
    )
 |