Suman Maharana 743ff85ee4
WIP #23073 : Pipeline Observability (#23341)
* init pipeline-profiler cli command

* Fixed issues with the bulk sink

* Update generated TypeScript types

* fix and remove unnecessary code blocks

* fix and remove unnecessary code blocks

* Added get observ data by pipeline id api

* Added APIs for metrics and charts

* remove fallback mechanism

* Build fixes

* mvn build fixes

* Api remove unnecssary changes

* Fix Metrics API

* Fix trends API

* Fixed filtering

* Added sample data

* Added more sample data

* Move to metadata workflow

* removed unused files

* remove unnecesary files

* json2ts

* change to debug logs

* remove pipeline profiler helpers

* Update generated TypeScript types

* Update generated TypeScript types

* created PipelineExecutionIndex

* Fix limit param for pagination

* Update generated TypeScript types

* addressed comments

* linting

* fix sample_data

* Added serviceType in api response

* Add endtime in sample data

* Update generated TypeScript types

* Addressed comments

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Harshit Shah <harshit.shah@getcollate.io>
2025-11-25 08:21:23 +05:30

356 lines
13 KiB
JSON

{
"pipelines": [{
"name": "presto_etl",
"displayName": "Presto ETL",
"description": "Presto ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=presto_etl",
"scheduleInterval": "* * * * *",
"tasks": [
{
"name": "presto_task",
"displayName": "Presto Task",
"description": "Airflow operator to perform ETL on presto tables",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_address_etl",
"displayName": "dim_address etl",
"description": "dim_address ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_address_etl",
"scheduleInterval": "5 * * * *",
"tasks": [{
"name": "dim_address_task",
"displayName": "dim_address Task",
"description": "Airflow operator to perform ETL and generate dim_address table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=dim_address_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_user_etl",
"displayName": "dim_user etl",
"description": "dim_user ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_user_etl",
"tasks": [{
"name": "dim_user_task",
"displayName": "dim_user Task",
"description": "Airflow operator to perform ETL and generate dim_user table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=dim_user_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_location_etl",
"displayName": "dim_location etl",
"description": "diim_location ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_address_etl",
"tasks": [{
"name": "dim_location_task",
"displayName": "dim_location Task",
"description": "Airflow operator to perform ETL and generate dim_location table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=dim_location_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_product_etl",
"displayName": "dim_product etl",
"description": "diim_product ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_address_etl",
"tasks": [{
"name": "dim_product_task",
"displayName": "dim_product Task",
"description": "Airflow operator to perform ETL and generate dim_product table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=dim_product_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "trino_etl",
"displayName": "Trino ETL",
"description": "Trino ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=trino_etl",
"scheduleInterval": "@once",
"tasks": [{
"name": "trino_task",
"displayName": "Trino Task",
"description": "Airflow operator to perform ETL on trino tables",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "TrinoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "hive_etl",
"displayName": "Hive ETL",
"description": "Hive ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=hive_etl",
"tasks": [{
"name": "hive_create_table",
"displayName": "Hive Create Table",
"description": "Hive Create Table Task",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=hive_create_table",
"downstreamTasks": ["assert_table_exits"],
"taskType": "HiveOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "snowflake_etl",
"displayName": "Snowflake ETL",
"description": "Snowflake ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=snowflake_etl",
"tasks": [{
"name": "snowflake_task",
"displayName": "Snowflake Task",
"description": "Airflow operator to perform ETL on snowflake tables",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "SnowflakeOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}]
},
{
"name": "real_time_metrics",
"displayName": "Real-time Metrics Aggregation",
"description": "Real-time metrics aggregation pipeline running every 15 minutes",
"sourceUrl": "http://localhost:8080/tree?dag_id=real_time_metrics",
"scheduleInterval": "*/15 * * * *",
"tasks": [{
"name": "metrics_aggregation_task",
"displayName": "Metrics Aggregation Task",
"description": "Airflow operator to aggregate streaming metrics",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=metrics_aggregation_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PythonOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}]
},
{
"name": "ml_feature_pipeline",
"displayName": "ML Feature Engineering Pipeline",
"description": "Machine learning feature engineering pipeline running twice daily",
"sourceUrl": "http://localhost:8080/tree?dag_id=ml_feature_pipeline",
"scheduleInterval": "0 2,14 * * *",
"tasks": [{
"name": "feature_engineering_task",
"displayName": "Feature Engineering Task",
"description": "Airflow operator to generate ML features from order data",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=feature_engineering_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PythonOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?flt1_dag_id_equals=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}]
},
{
"name": "dbt_staging_shopify",
"displayName": "DBT Staging Shopify",
"description": "DBT pipeline for staging shopify raw data - runs daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_staging_shopify",
"scheduleInterval": "0 1 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_run_staging",
"displayName": "DBT Run Staging Models",
"description": "Execute dbt run for staging models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_staging_shopify",
"downstreamTasks": ["dbt_test_staging"],
"taskType": "dbtRunTask"
},
{
"name": "dbt_test_staging",
"displayName": "DBT Test Staging Models",
"description": "Execute dbt tests for staging models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_staging_shopify",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
},
{
"name": "dbt_transform_orders",
"displayName": "DBT Transform Orders",
"description": "DBT pipeline for transforming order data into fact tables - runs daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_transform_orders",
"scheduleInterval": "0 3 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_run_orders",
"displayName": "DBT Run Order Transformations",
"description": "Execute dbt run for order fact tables",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_transform_orders",
"downstreamTasks": ["dbt_test_orders"],
"taskType": "dbtRunTask"
},
{
"name": "dbt_test_orders",
"displayName": "DBT Test Order Models",
"description": "Execute dbt tests for order models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_transform_orders",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
},
{
"name": "dbt_analytics_customers",
"displayName": "DBT Customer Analytics",
"description": "DBT pipeline for customer analytics and aggregations - runs daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_analytics_customers",
"scheduleInterval": "0 4 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_build_analytics",
"displayName": "DBT Build Customer Analytics",
"description": "Execute dbt build for customer analytics models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_analytics_customers",
"downstreamTasks": [],
"taskType": "dbtBuildTask"
}]
},
{
"name": "dbt_snapshot_inventory",
"displayName": "DBT Snapshot Inventory",
"description": "DBT snapshot pipeline for tracking inventory changes - runs hourly",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_snapshot_inventory",
"scheduleInterval": "0 * * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_snapshot_task",
"displayName": "DBT Snapshot",
"description": "Execute dbt snapshot for inventory tracking",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_snapshot_inventory",
"downstreamTasks": [],
"taskType": "dbtSnapshotTask"
}]
},
{
"name": "dbt_test_data_quality",
"displayName": "DBT Data Quality Tests",
"description": "DBT data quality testing pipeline - runs 4 times daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_test_data_quality",
"scheduleInterval": "0 */6 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_test_all",
"displayName": "DBT Test All Models",
"description": "Execute comprehensive dbt tests across all models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_test_data_quality",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
},
{
"name": "dbt_ml_features",
"displayName": "DBT ML Feature Generation",
"description": "DBT pipeline for generating ML features - runs twice daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_ml_features",
"scheduleInterval": "0 2,14 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_run_ml_features",
"displayName": "DBT Run ML Features",
"description": "Execute dbt run for ML feature generation",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_ml_features",
"downstreamTasks": ["dbt_test_ml_features"],
"taskType": "dbtRunTask"
},
{
"name": "dbt_test_ml_features",
"displayName": "DBT Test ML Features",
"description": "Execute dbt tests for ML feature models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_ml_features",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
}
]
}