From c6acce99068deb755874fed10449940bbbd2bc66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 1 Apr 2025 11:12:47 +0200 Subject: [PATCH] feat(powerbi): capture dataset report lineage (#12993) --- .../ingestion/source/powerbi/powerbi.py | 15 ++- .../powerbi/golden_test_admin_only.json | 42 ++++++++ .../powerbi/golden_test_container.json | 98 +++++++++++++++++++ .../powerbi/golden_test_report.json | 98 +++++++++++++++++++ 4 files changed, 252 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 0b1d37ebe6..a54f599aca 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -94,7 +94,7 @@ from datahub.metadata.schema_classes import ( UpstreamLineageClass, ViewPropertiesClass, ) -from datahub.metadata.urns import ChartUrn +from datahub.metadata.urns import ChartUrn, DatasetUrn from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo from datahub.utilities.dedup_list import deduplicate_list from datahub.utilities.urns.urn_iter import lowercase_dataset_urn @@ -1083,6 +1083,7 @@ class Mapper: report: powerbi_data_classes.Report, chart_mcps: List[MetadataChangeProposalWrapper], user_mcps: List[MetadataChangeProposalWrapper], + dataset_edges: List[EdgeClass], ) -> List[MetadataChangeProposalWrapper]: """ Map PowerBi report to Datahub dashboard @@ -1104,6 +1105,7 @@ class Mapper: charts=chart_urn_list, lastModified=ChangeAuditStamps(), dashboardUrl=report.webUrl, + datasetEdges=dataset_edges, ) info_mcp = self.new_mcp( @@ -1197,12 +1199,23 @@ class Mapper: ds_mcps = self.to_datahub_dataset(report.dataset, workspace) chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps) + # collect all upstream datasets; using a set to retain unique urns + dataset_urns = { + dataset.entityUrn + for dataset in ds_mcps + if dataset.entityType == DatasetUrn.ENTITY_TYPE and dataset.entityUrn + } + dataset_edges = [ + EdgeClass(destinationUrn=dataset_urn) for dataset_urn in dataset_urns + ] + # Let's convert report to datahub dashboard report_mcps = self.report_to_dashboard( workspace=workspace, report=report, chart_mcps=chart_mcps, user_mcps=user_mcps, + dataset_edges=dataset_edges, ) # Now add MCPs in sequence diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json b/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json index 6d3c090c05..243e2e01b4 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_admin_only.json @@ -2524,6 +2524,48 @@ "aspectName": "dashboardInfo", "aspect": { "json": [ + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + } + }, { "op": "add", "path": "/title", diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json index 93da56fdde..302c9bd291 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_container.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_container.json @@ -3458,6 +3458,55 @@ "aspectName": "dashboardInfo", "aspect": { "json": [ + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + } + }, { "op": "add", "path": "/title", @@ -4595,6 +4644,55 @@ "aspectName": "dashboardInfo", "aspect": { "json": [ + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + } + }, { "op": "add", "path": "/title", diff --git a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json index af775dc5fa..ed8252d893 100644 --- a/metadata-ingestion/tests/integration/powerbi/golden_test_report.json +++ b/metadata-ingestion/tests/integration/powerbi/golden_test_report.json @@ -2503,6 +2503,55 @@ "aspectName": "dashboardInfo", "aspect": { "json": [ + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + } + }, { "op": "add", "path": "/title", @@ -3425,6 +3474,55 @@ "aspectName": "dashboardInfo", "aspect": { "json": [ + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)" + } + }, + { + "op": "add", + "path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)", + "value": { + "destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)" + } + }, { "op": "add", "path": "/title",