feat(powerbi): capture dataset report lineage (#12993)

This commit is contained in:
Sergio Gómez Villamor 2025-04-01 11:12:47 +02:00 committed by GitHub
parent ebea3b7ca3
commit c6acce9906
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 252 additions and 1 deletions

View File

@ -94,7 +94,7 @@ from datahub.metadata.schema_classes import (
UpstreamLineageClass,
ViewPropertiesClass,
)
from datahub.metadata.urns import ChartUrn
from datahub.metadata.urns import ChartUrn, DatasetUrn
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
from datahub.utilities.dedup_list import deduplicate_list
from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
@ -1083,6 +1083,7 @@ class Mapper:
report: powerbi_data_classes.Report,
chart_mcps: List[MetadataChangeProposalWrapper],
user_mcps: List[MetadataChangeProposalWrapper],
dataset_edges: List[EdgeClass],
) -> List[MetadataChangeProposalWrapper]:
"""
Map PowerBi report to Datahub dashboard
@ -1104,6 +1105,7 @@ class Mapper:
charts=chart_urn_list,
lastModified=ChangeAuditStamps(),
dashboardUrl=report.webUrl,
datasetEdges=dataset_edges,
)
info_mcp = self.new_mcp(
@ -1197,12 +1199,23 @@ class Mapper:
ds_mcps = self.to_datahub_dataset(report.dataset, workspace)
chart_mcps = self.pages_to_chart(report.pages, workspace, ds_mcps)
# collect all upstream datasets; using a set to retain unique urns
dataset_urns = {
dataset.entityUrn
for dataset in ds_mcps
if dataset.entityType == DatasetUrn.ENTITY_TYPE and dataset.entityUrn
}
dataset_edges = [
EdgeClass(destinationUrn=dataset_urn) for dataset_urn in dataset_urns
]
# Let's convert report to datahub dashboard
report_mcps = self.report_to_dashboard(
workspace=workspace,
report=report,
chart_mcps=chart_mcps,
user_mcps=user_mcps,
dataset_edges=dataset_edges,
)
# Now add MCPs in sequence

View File

@ -2524,6 +2524,48 @@
"aspectName": "dashboardInfo",
"aspect": {
"json": [
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
}
},
{
"op": "add",
"path": "/title",

View File

@ -3458,6 +3458,55 @@
"aspectName": "dashboardInfo",
"aspect": {
"json": [
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
}
},
{
"op": "add",
"path": "/title",
@ -4595,6 +4644,55 @@
"aspectName": "dashboardInfo",
"aspect": {
"json": [
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
}
},
{
"op": "add",
"path": "/title",

View File

@ -2503,6 +2503,55 @@
"aspectName": "dashboardInfo",
"aspect": {
"json": [
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
}
},
{
"op": "add",
"path": "/title",
@ -3425,6 +3474,55 @@
"aspectName": "dashboardInfo",
"aspect": {
"json": [
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query-with-join,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.public_issue_history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.job-history,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.postgres_test_table,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.snowflake_native-query,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.SNOWFLAKE_TESTTABLE,DEV)"
}
},
{
"op": "add",
"path": "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)",
"value": {
"destinationUrn": "urn:li:dataset:(urn:li:dataPlatform:powerbi,library-dataset.big-query-with-parameter,DEV)"
}
},
{
"op": "add",
"path": "/title",