2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								title: Run the Airflow Connector Externally
							 
						 
					
						
							
								
									
										
										
										
											2025-07-17 11:40:52 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								description: Use YAML to configure Airflow pipeline metadata ingestion including DAGs, tasks, scheduling, and lineage mapping.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								slug: /connectors/pipeline/airflow/yaml
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% connectorDetailsHeader
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								name="Airflow"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								stage="PROD"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								platform="OpenMetadata"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								availableFeatures=["Pipelines", "Pipeline Status", "Lineage", "Owners"]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								unavailableFeatures=["Tags"]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/ %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-03-10 12:25:59 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								In this section, we provide guides and references to use the Airflow connector.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-03-10 12:25:59 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								Configure and schedule Airflow metadata and profiler workflows from the OpenMetadata UI:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Requirements ](#requirements ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Metadata Ingestion ](#metadata-ingestion ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/external-ingestion-deployment.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## Requirements
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### Python Requirements
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/python-requirements.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								To run the Airflow ingestion, you will need to install:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								pip3 install "openmetadata-ingestion[airflow]"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% note %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Note that this installs the same Airflow version that we ship in the Ingestion Container. If you are running
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								the ingestion from Airflow already, you **DON'T NEED**  to install the `airflow`  plugin.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Instead, just run `pip3 install "openmetadata-ingestion"` .
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /note %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**Note:** we only support officially supported Airflow versions. You can check the version list [here ](https://airflow.apache.org/docs/apache-airflow/stable/installation/supported-versions.html ).
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## Metadata Ingestion
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								All connectors are defined as JSON Schemas.
							 
						 
					
						
							
								
									
										
										
										
											2025-03-10 12:25:59 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								[Here ](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/pipeline/airflowConnection.json )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								you can find the structure to create a connection to Airflow.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In order to create and run a Metadata Ingestion workflow, we will follow
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								the steps to create a YAML configuration able to connect to the source,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								process the Entities if needed, and reach the OpenMetadata server.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The workflow is modeled around the following
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[JSON Schema ](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### 1. Define the YAML Config
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-03-10 12:25:59 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								This is a sample config for Airflow:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% codePreview %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% codeInfoContainer %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								#### Source Configuration - Service Connection
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% codeInfo srNumber=1 %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**connection**: Airflow metadata database connection. See
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  these [docs ](https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  for supported backends.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In terms of `connection`  we support the following selections:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `backend` : Should not be used from the UI. This is only applicable when ingesting Airflow metadata locally by running 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  the ingestion from a DAG. It will use the current Airflow SQLAlchemy connection to extract the data.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `MySQL` , `Postgres` , and `SQLite` : Pass the required credentials to reach out each of these services. We will 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  create a connection to the pointed database and read Airflow data from there.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**hostPort**: URL to the Airflow instance.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /codeInfo %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% codeInfo srNumber=1 %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**numberOfStatus**: Number of status we want to look back to in every ingestion (e.g., Past executions from a DAG).
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /codeInfo %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% codeInfo srNumber=1 %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**connection**: Airflow metadata database connection. See
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  these [docs ](https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  for supported backends.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In terms of `connection`  we support the following selections:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `backend` : Should not be used from the UI. This is only applicable when ingesting Airflow metadata locally by running 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  the ingestion from a DAG. It will use the current Airflow SQLAlchemy connection to extract the data.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `MySQL` , `Postgres` , and `SQLite` : Pass the required credentials to reach out each of these services. We will 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  create a connection to the pointed database and read Airflow data from there.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /codeInfo %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/pipeline/source-config-def.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/ingestion-sink-def.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/workflow-config-def.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /codeInfoContainer %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% codeBlock fileName="filename.yaml" %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```yaml {% isCodeBlock=true %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								source:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  type: airflow
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  serviceName: airflow_source
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  serviceConnection:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    config:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      type: Airflow
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```yaml {% srNumber=6 %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      hostPort: http://localhost:8080
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```yaml {% srNumber=6 %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      numberOfStatus: 10
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```yaml {% srNumber=6 %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      # Connection needs to be one of Mysql, Postgres or Sqlite
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      connection:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        type: Mysql
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        username: airflow_user
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        authType:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          password: airflow_pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        databaseSchema: airflow_db
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        hostPort: localhost:3306
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # #
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # type: Postgres
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # username: airflow_user
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # authType:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        #   password: airflow_pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # database: airflow_db
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # hostPort: localhost:3306
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # #
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # type: Sqlite
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # username: airflow_user
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # password: airflow_pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # database: airflow_db
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # hostPort: localhost:3306
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        # databaseMode: ":memory:" (optional)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/pipeline/source-config.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/ingestion-sink.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/workflow-config.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /codeBlock %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{% /codePreview %}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 12:22:38 +05:30 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								{% partial file="/v1.9/connectors/yaml/ingestion-cli.md" /%}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-18 15:53:06 +02:00