mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 18:48:35 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			89 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
			
		
		
	
	
			89 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
| # Airflow
 | |
| 
 | |
| We highly recommend using Airflow or similar schedulers to run Metadata Connectors. Below is the sample code example you can refer to integrate with Airflow
 | |
| 
 | |
| ## Airflow Example for Sample Data
 | |
| 
 | |
| ```python
 | |
| import pathlib
 | |
| import json
 | |
| from datetime import timedelta
 | |
| from airflow import DAG
 | |
| 
 | |
| try:
 | |
|     from airflow.operators.python import PythonOperator
 | |
| except ModuleNotFoundError:
 | |
|     from airflow.operators.python_operator import PythonOperator
 | |
| 
 | |
| from metadata.config.common import load_config_file
 | |
| from metadata.ingestion.api.workflow import Workflow
 | |
| 
 | |
| default_args = {
 | |
|     "owner": "user_name",
 | |
|     "email": ["username@org.com"],
 | |
|     "email_on_failure": True,
 | |
|     "retries": 3,
 | |
|     "retry_delay": timedelta(minutes=5),
 | |
|     "execution_timeout": timedelta(minutes=60),
 | |
| }
 | |
| 
 | |
| config = """
 | |
| {
 | |
|   "source": {
 | |
|     "type": "sample-data",
 | |
|     "config": {
 | |
|       "sample_data_folder": "./examples/sample_data"
 | |
|     }
 | |
|   },
 | |
|   "sink": {
 | |
|     "type": "metadata-rest",
 | |
|     "config": {}
 | |
|   },
 | |
|   "metadata_server": {
 | |
|     "type": "metadata-server",
 | |
|     "config": {
 | |
|       "api_endpoint": "http://localhost:8585/api",
 | |
|       "auth_provider_type": "no-auth"
 | |
|     }
 | |
|   }
 | |
| }
 | |
| """
 | |
| 
 | |
| def metadata_ingestion_workflow():
 | |
|     workflow_config = json.loads(config)
 | |
|     workflow = Workflow.create(workflow_config)
 | |
|     workflow.execute()
 | |
|     workflow.raise_from_status()
 | |
|     workflow.print_status()
 | |
|     workflow.stop()
 | |
| 
 | |
| 
 | |
| with DAG(
 | |
|     "sample_data",
 | |
|     default_args=default_args,
 | |
|     description="An example DAG which runs a OpenMetadata ingestion workflow",
 | |
|     start_date=days_ago(1),
 | |
|     is_paused_upon_creation=False,
 | |
|     schedule_interval='*/5 * * * *', 
 | |
|     catchup=False,
 | |
| ) as dag:
 | |
|     ingest_task = PythonOperator(
 | |
|         task_id="ingest_using_recipe",
 | |
|         python_callable=metadata_ingestion_workflow(),
 | |
|     )
 | |
| ```
 | |
| 
 | |
| we are using a python method like below
 | |
| 
 | |
| ```python
 | |
| def metadata_ingestion_workflow():
 | |
|     workflow_config = json.loads(config)
 | |
|     workflow = Workflow.create(workflow_config)
 | |
|     workflow.execute()
 | |
|     workflow.raise_from_status()
 | |
|     workflow.print_status()
 | |
|     workflow.stop
 | |
| ```
 | |
| 
 | |
| Create a Workflow instance and pass a sample-data configuration which will read metadata from Json files and ingest it into the OpenMetadata Server. You can customize this configuration or add different connectors please refer to our [examples](https://github.com/open-metadata/OpenMetadata/tree/main/ingestion/examples/workflows) and refer to [Connectors](connectors/).
 | 
