mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-11 08:52:58 +00:00
62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
import os
|
|
import time
|
|
|
|
from datahub.api.entities.datajob import DataFlow
|
|
from datahub.api.entities.dataprocess.dataprocess_instance import (
|
|
DataProcessInstance,
|
|
InstanceRunResult,
|
|
)
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
|
|
emitter = DatahubRestEmitter(
|
|
gms_server=os.getenv("DATAHUB_GMS_URL", "http://localhost:8080"),
|
|
token=os.getenv("DATAHUB_GMS_TOKEN"),
|
|
)
|
|
|
|
# Define the DataFlow (Airflow DAG)
|
|
dataflow = DataFlow(
|
|
orchestrator="airflow",
|
|
id="daily_reporting_pipeline",
|
|
env="prod",
|
|
description="Daily reporting pipeline that aggregates metrics",
|
|
)
|
|
|
|
# Create a DataProcessInstance for a specific DAG run
|
|
dag_run_instance = DataProcessInstance.from_dataflow(
|
|
dataflow=dataflow, id="scheduled__2024-01-15T00:00:00+00:00"
|
|
)
|
|
|
|
# Set properties specific to this DAG run
|
|
dag_run_instance.properties = {
|
|
"execution_date": "2024-01-15",
|
|
"run_type": "scheduled",
|
|
"external_trigger": "false",
|
|
}
|
|
dag_run_instance.url = "https://airflow.company.com/dags/daily_reporting_pipeline/grid?dag_run_id=scheduled__2024-01-15T00:00:00+00:00"
|
|
|
|
# Track DAG run start
|
|
start_time = int(time.time() * 1000)
|
|
dag_run_instance.emit_process_start(
|
|
emitter=emitter,
|
|
start_timestamp_millis=start_time,
|
|
attempt=1,
|
|
emit_template=True,
|
|
materialize_iolets=True,
|
|
)
|
|
print(f"DAG run started: {dag_run_instance.urn}")
|
|
|
|
# Simulate DAG execution
|
|
time.sleep(3)
|
|
|
|
# Track DAG run completion
|
|
end_time = int(time.time() * 1000)
|
|
dag_run_instance.emit_process_end(
|
|
emitter=emitter,
|
|
end_timestamp_millis=end_time,
|
|
result=InstanceRunResult.SUCCESS,
|
|
result_type="airflow",
|
|
attempt=1,
|
|
start_timestamp_millis=start_time,
|
|
)
|
|
print(f"DAG run completed successfully in {end_time - start_time}ms")
|