From a8aa1d004fbf848a7de3d3648e012d197ef5cdbe Mon Sep 17 00:00:00 2001 From: harshsoni2024 <64592571+harshsoni2024@users.noreply.github.com> Date: Tue, 27 May 2025 16:50:55 +0530 Subject: [PATCH] MINOR: PBI dataset expressions empty value fix (#21409) (cherry picked from commit 8bbc4d8c3d7792d0d36a163f119efdccf61840e5) --- .../source/dashboard/powerbi/metadata.py | 10 ++- .../source/dashboard/powerbi/models.py | 4 +- .../unit/topology/dashboard/test_powerbi.py | 67 ++++++++++++++++++- 3 files changed, 77 insertions(+), 4 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py index d14dbb644db..b8462116061 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/metadata.py @@ -706,6 +706,12 @@ class PowerbiSource(DashboardServiceSource): if dataset and dataset.expressions: # find keyword from dataset expressions for dexpression in dataset.expressions: + if not dexpression.expression: + logger.debug( + f"No expression value found inside dataset" + f"({dataset.name}) expressions' name={dexpression.name}" + ) + continue if dexpression.name == match.group(2): pattern = r'DefaultValue="([^"]+)"' kw_match = re.search(pattern, dexpression.expression) @@ -784,7 +790,9 @@ class PowerbiSource(DashboardServiceSource): if not isinstance(table.source, list): return {} source_expression = table.source[0].expression - + if not source_expression: + logger.debug(f"No source expression found for table: {table.name}") + return {} # parse snowflake source table_info = self._parse_snowflake_source( source_expression, datamodel_entity diff --git a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/models.py b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/models.py index 3e0fc9e4664..e988b6a3fd7 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/models.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/models.py @@ -143,7 +143,7 @@ class PowerBITableSource(BaseModel): PowerBI Table Source """ - expression: str + expression: Optional[str] = None class PowerBiTable(BaseModel): @@ -171,7 +171,7 @@ class TablesResponse(BaseModel): class DatasetExpression(BaseModel): name: str - expression: str + expression: Optional[str] = None class Dataset(BaseModel): diff --git a/ingestion/tests/unit/topology/dashboard/test_powerbi.py b/ingestion/tests/unit/topology/dashboard/test_powerbi.py index 7c3897a3027..d2c01e28810 100644 --- a/ingestion/tests/unit/topology/dashboard/test_powerbi.py +++ b/ingestion/tests/unit/topology/dashboard/test_powerbi.py @@ -15,7 +15,12 @@ from metadata.generated.schema.type.entityReference import EntityReference from metadata.generated.schema.type.entityReferenceList import EntityReferenceList from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.dashboard.powerbi.metadata import PowerbiSource -from metadata.ingestion.source.dashboard.powerbi.models import Dataset, PowerBIDashboard +from metadata.ingestion.source.dashboard.powerbi.models import ( + Dataset, + PowerBIDashboard, + PowerBiTable, + PowerBITableSource, +) MOCK_REDSHIFT_EXP = """ let @@ -167,6 +172,19 @@ MOCK_DATASET_FROM_WORKSPACE = Dataset( }, ], ) +MOCK_DATASET_FROM_WORKSPACE_V2 = Dataset( + id="testdataset", + name="Test Dataset", + tables=[], + expressions=[ + { + "name": "DB", + }, + { + "name": "Schema", + }, + ], +) MOCK_DASHBOARD_DATA_MODEL = DashboardDataModel( name="dummy_datamodel", id=uuid.uuid4(), @@ -292,3 +310,50 @@ class PowerBIUnitTest(TestCase): # Verify get_reference_by_email was not called when there are no owners self.powerbi.metadata.get_reference_by_email.assert_not_called() + + @pytest.mark.order(3) + def test_parse_table_info_from_source_exp(self): + table = PowerBiTable( + name="test_table", + source=[PowerBITableSource(expression=MOCK_REDSHIFT_EXP)], + ) + result = self.powerbi._parse_table_info_from_source_exp( + table, MOCK_DASHBOARD_DATA_MODEL + ) + self.assertEqual(result, EXPECTED_REDSHIFT_RESULT) + + # no source expression + table = PowerBiTable( + name="test_table", + source=[PowerBITableSource(expression=None)], + ) + result = self.powerbi._parse_table_info_from_source_exp( + table, MOCK_DASHBOARD_DATA_MODEL + ) + self.assertEqual(result, {}) + + # no source + table = PowerBiTable( + name="test_table", + source=[], + ) + result = self.powerbi._parse_table_info_from_source_exp( + table, MOCK_DASHBOARD_DATA_MODEL + ) + self.assertEqual(result, {}) + + @pytest.mark.order(4) + @patch.object( + PowerbiSource, + "_fetch_dataset_from_workspace", + return_value=MOCK_DATASET_FROM_WORKSPACE_V2, + ) + def test_parse_dataset_expressions(self, *_): + # test with valid snowflake source but no + # dataset expression value + result = self.powerbi._parse_snowflake_source( + MOCK_SNOWFLAKE_EXP_V2, MOCK_DASHBOARD_DATA_MODEL + ) + self.assertIsNone(result["database"]) + self.assertIsNone(result["schema"]) + self.assertEqual(result["table"], "CUSTOMER_TABLE")