From 9105241bfd1822c0b9d43b45b34fc4fd9c16241e Mon Sep 17 00:00:00 2001 From: Benjamin Maquet Date: Mon, 8 Sep 2025 09:35:43 +0200 Subject: [PATCH] feat(superset/preset): add dataset and column description (#14426) --- .../src/datahub/ingestion/source/superset.py | 5 +- .../superset/golden_test_stateful_ingest.json | 182 +++++++++--------- 2 files changed, 94 insertions(+), 93 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index d0ae289188..0adb28c1ff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -154,6 +154,7 @@ class SupersetDataset(BaseModel): table_name: str changed_on_utc: Optional[str] = None explore_url: Optional[str] = "" + description: Optional[str] = "" @property def modified_dt(self) -> Optional[datetime]: @@ -1062,7 +1063,7 @@ class SupersetSource(StatefulIngestionSourceBase): fieldPath=col.get("column_name", ""), type=SchemaFieldDataType(data_type), nativeDataType="", - description=col.get("column_name", ""), + description=col.get("description") or col.get("column_name", ""), nullable=True, ) schema_fields.append(field) @@ -1283,7 +1284,7 @@ class SupersetSource(StatefulIngestionSourceBase): dataset_info = DatasetPropertiesClass( name=dataset.table_name, - description="", + description=dataset.description or "", externalUrl=dataset_url, lastModified=TimeStamp(time=modified_ts), ) diff --git a/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json b/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json index 8cf2eb5b2a..2fb45c5608 100644 --- a/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json +++ b/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json @@ -179,7 +179,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -364,7 +364,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -549,7 +549,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -734,7 +734,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -919,7 +919,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -1104,7 +1104,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -1289,7 +1289,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -1474,7 +1474,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -1659,7 +1659,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -1692,7 +1692,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1705,7 +1705,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -1748,7 +1748,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -1852,7 +1852,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -1885,7 +1885,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -1898,7 +1898,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -1941,7 +1941,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -2045,7 +2045,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -2078,7 +2078,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2091,7 +2091,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -2134,7 +2134,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -2238,7 +2238,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -2271,7 +2271,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2284,7 +2284,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -2327,7 +2327,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -2431,7 +2431,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -2464,7 +2464,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2477,7 +2477,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -2520,7 +2520,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -2624,7 +2624,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -2657,7 +2657,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2670,7 +2670,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -2713,7 +2713,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -2817,7 +2817,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -2850,7 +2850,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -2863,7 +2863,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -2906,7 +2906,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -3010,7 +3010,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3043,7 +3043,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -3056,7 +3056,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -3099,7 +3099,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -3203,7 +3203,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3236,7 +3236,7 @@ { "fieldPath": "test_column3", "nullable": true, - "description": "test_column3", + "description": "some description 3", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -3249,7 +3249,7 @@ { "fieldPath": "test_column4", "nullable": true, - "description": "test_column4", + "description": "some description 4", "type": { "type": { "com.linkedin.pegasus2avro.schema.TimeType": {} @@ -3292,7 +3292,7 @@ "customProperties": {}, "externalUrl": "mock://mock-domain.superset.com/explore/?datasource_type=table&datasource_id=2", "name": "Test Table 2", - "description": "", + "description": "Sample description for dataset 2", "lastModified": { "time": 1707579020123 }, @@ -3396,7 +3396,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3446,7 +3446,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3504,7 +3504,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3570,7 +3570,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3628,7 +3628,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3686,7 +3686,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3756,7 +3756,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3773,7 +3773,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3790,7 +3790,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3807,7 +3807,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3824,41 +3824,7 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", - "lastRunId": "no-run-id-provided", - "pipelineName": "test_pipeline" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:superset,test_database1.test_schema1.Test Table 1,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", - "lastRunId": "no-run-id-provided", - "pipelineName": "test_pipeline" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(superset,2)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": true - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3875,7 +3841,24 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", + "lastRunId": "no-run-id-provided", + "pipelineName": "test_pipeline" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:superset,test_database1.test_schema1.Test Table 1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" } @@ -3892,7 +3875,24 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-2020_04_14-07_00_00-wap353", + "runId": "superset-2020_04_14-07_00_00-lbqsmm", + "lastRunId": "no-run-id-provided", + "pipelineName": "test_pipeline" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(superset,2)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "superset-2020_04_14-07_00_00-lbqsmm", "lastRunId": "no-run-id-provided", "pipelineName": "test_pipeline" }