mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-03 20:19:31 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			98 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			98 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#  Copyright 2021 Collate
 | 
						|
#  Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
#  you may not use this file except in compliance with the License.
 | 
						|
#  You may obtain a copy of the License at
 | 
						|
#  http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#  Unless required by applicable law or agreed to in writing, software
 | 
						|
#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
#  See the License for the specific language governing permissions and
 | 
						|
#  limitations under the License.
 | 
						|
 | 
						|
import json
 | 
						|
from datetime import timedelta
 | 
						|
 | 
						|
from airflow import DAG
 | 
						|
 | 
						|
try:
 | 
						|
    from airflow.operators.python import PythonOperator
 | 
						|
except ModuleNotFoundError:
 | 
						|
    from airflow.operators.python_operator import PythonOperator
 | 
						|
 | 
						|
from airflow.utils.dates import days_ago
 | 
						|
 | 
						|
from metadata.ingestion.api.workflow import Workflow
 | 
						|
 | 
						|
default_args = {
 | 
						|
    "owner": "user_name",
 | 
						|
    "email": ["username@org.com"],
 | 
						|
    "email_on_failure": False,
 | 
						|
    "retries": 3,
 | 
						|
    "retry_delay": timedelta(seconds=10),
 | 
						|
    "execution_timeout": timedelta(minutes=60),
 | 
						|
}
 | 
						|
 | 
						|
config = """
 | 
						|
{
 | 
						|
  "source": {
 | 
						|
    "type": "sample-usage",
 | 
						|
    "serviceName": "sample_data",
 | 
						|
    "serviceConnection": {
 | 
						|
      "config": {
 | 
						|
        "type": "SampleData",
 | 
						|
        "sampleDataFolder": "./examples/sample_data"
 | 
						|
      }
 | 
						|
    },
 | 
						|
    "sourceConfig": {}
 | 
						|
  },
 | 
						|
  "processor": {
 | 
						|
    "type": "query-parser",
 | 
						|
    "config": {
 | 
						|
      "filter": ""
 | 
						|
    }
 | 
						|
  },
 | 
						|
  "stage": {
 | 
						|
    "type": "table-usage",
 | 
						|
    "config": {
 | 
						|
      "filename": "/tmp/sample_usage"
 | 
						|
    }
 | 
						|
  },
 | 
						|
  "bulkSink": {
 | 
						|
    "type": "metadata-usage",
 | 
						|
    "config": {
 | 
						|
      "filename": "/tmp/sample_usage"
 | 
						|
    }
 | 
						|
  },
 | 
						|
  "workflowConfig": {
 | 
						|
    "openMetadataServerConfig": {
 | 
						|
      "hostPort": "http://localhost:8585/api",
 | 
						|
      "authProvider": "no-auth"
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
"""
 | 
						|
 | 
						|
 | 
						|
def metadata_ingestion_workflow():
 | 
						|
    workflow_config = json.loads(config)
 | 
						|
    workflow = Workflow.create(workflow_config)
 | 
						|
    workflow.execute()
 | 
						|
    workflow.raise_from_status()
 | 
						|
    workflow.print_status()
 | 
						|
    workflow.stop()
 | 
						|
 | 
						|
 | 
						|
with DAG(
 | 
						|
    "sample_usage",
 | 
						|
    default_args=default_args,
 | 
						|
    description="An example DAG which runs a OpenMetadata ingestion workflow",
 | 
						|
    schedule_interval=timedelta(days=1),
 | 
						|
    start_date=days_ago(1),
 | 
						|
    is_paused_upon_creation=True,
 | 
						|
    catchup=False,
 | 
						|
) as dag:
 | 
						|
    ingest_task = PythonOperator(
 | 
						|
        task_id="ingest_using_recipe",
 | 
						|
        python_callable=metadata_ingestion_workflow,
 | 
						|
    )
 |