# Copyright 2021 Collate # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ This DAG can be used directly in your Airflow instance after installing the `openmetadata-ingestion[airflow-container]` package. Its purpose is to connect to the underlying database, retrieve the information and push it to OpenMetadata. """ from datetime import timedelta import yaml from airflow import DAG try: from airflow.operators.python import PythonOperator except ModuleNotFoundError: from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago from metadata.ingestion.api.workflow import Workflow default_args = { "owner": "user_name", "email": ["username@org.com"], "email_on_failure": False, "retries": 3, "retry_delay": timedelta(minutes=5), "execution_timeout": timedelta(minutes=60), } config = """ source: type: airflow serviceName: airflow_source serviceConnection: config: type: Airflow hostPort: http://localhost:8080 numberOfStatus: 10 connection: type: Backend sourceConfig: config: type: PipelineMetadata sink: type: metadata-rest config: {} workflowConfig: loggerLevel: INFO openMetadataServerConfig: hostPort: http://localhost:8585/api authProvider: no-auth """ def metadata_ingestion_workflow(): workflow_config = yaml.safe_load(config) workflow = Workflow.create(workflow_config) workflow.execute() workflow.raise_from_status() workflow.print_status() workflow.stop() with DAG( "airflow_metadata_extraction", default_args=default_args, description="An example DAG which pushes Airflow data to OM", start_date=days_ago(1), is_paused_upon_creation=True, schedule_interval="*/5 * * * *", catchup=False, ) as dag: ingest_task = PythonOperator( task_id="ingest_using_recipe", python_callable=metadata_ingestion_workflow, )