fix(ingest/powerbi): fix broken lineage between chart and dataset (#11080)

This commit is contained in:
sid-acryl 2024-08-07 09:32:21 +05:30 committed by GitHub
parent 543e447787
commit 2755cf3559
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 833 additions and 816 deletions

View File

@ -89,6 +89,7 @@ from datahub.metadata.schema_classes import (
from datahub.metadata.urns import ChartUrn
from datahub.sql_parsing.sqlglot_lineage import ColumnLineageInfo
from datahub.utilities.dedup_list import deduplicate_list
from datahub.utilities.urns.urn_iter import lowercase_dataset_urn
# Logger instance
logger = logging.getLogger(__name__)
@ -127,7 +128,7 @@ class Mapper:
@staticmethod
def urn_to_lowercase(value: str, flag: bool) -> str:
if flag is True:
return value.lower()
return lowercase_dataset_urn(value)
return value
@ -390,11 +391,13 @@ class Mapper:
for table in dataset.tables:
# Create a URN for dataset
ds_urn = builder.make_dataset_urn_with_platform_instance(
platform=self.__config.platform_name,
name=self.assets_urn_to_lowercase(table.full_name),
platform_instance=self.__config.platform_instance,
env=self.__config.env,
ds_urn = self.assets_urn_to_lowercase(
builder.make_dataset_urn_with_platform_instance(
platform=self.__config.platform_name,
name=table.full_name,
platform_instance=self.__config.platform_instance,
env=self.__config.env,
)
)
logger.debug(f"dataset_urn={ds_urn}")

View File

@ -131,7 +131,7 @@ def _modify_at_path(
_modify_at_path(getattr(model, path[0]), path[1:], new_value)
def _lowercase_dataset_urn(dataset_urn: str) -> str:
def lowercase_dataset_urn(dataset_urn: str) -> str:
cur_urn = DatasetUrn.from_string(dataset_urn)
new_urn = DatasetUrn(
platform=cur_urn.platform, name=cur_urn.name.lower(), env=cur_urn.env
@ -149,10 +149,10 @@ def lowercase_dataset_urns(
) -> None:
def modify_urn(urn: str) -> str:
if guess_entity_type(urn) == "dataset":
return _lowercase_dataset_urn(urn)
return lowercase_dataset_urn(urn)
elif guess_entity_type(urn) == "schemaField":
cur_urn = Urn.from_string(urn)
cur_urn._entity_ids[0] = _lowercase_dataset_urn(cur_urn._entity_ids[0])
cur_urn._entity_ids[0] = lowercase_dataset_urn(cur_urn._entity_ids[0])
return str(cur_urn)
return urn

View File

@ -819,6 +819,8 @@ def test_powerbi_ingest_urn_lower_case(
"type": "powerbi",
"config": {
**default_source_config(),
"env": "PROD",
"platform_instance": "myPlatformInstance",
"convert_urns_to_lowercase": True,
"convert_lineage_urns_to_lowercase": True,
},