diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 5722b329c7..200ac4111a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -379,6 +379,14 @@ class ExploreUpstreamViewField: : -(len(self.field.field_group_variant.lower()) + 1) ] + # Validate that field_name is not empty to prevent invalid schema field URNs + if not field_name or not field_name.strip(): + logger.warning( + f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. " + f"Skipping field to prevent invalid schema field URN generation." + ) + return None + assert view_name # for lint false positive project_include: ProjectInclude = ProjectInclude( @@ -1351,7 +1359,25 @@ class LookerExplore: fine_grained_lineages = [] if config.extract_column_level_lineage: for field in self.fields or []: + # Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs + if not field.name or not field.name.strip(): + logger.warning( + f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'" + ) + continue + for upstream_column_ref in field.upstream_fields: + # Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs + if ( + not upstream_column_ref.column + or not upstream_column_ref.column.strip() + ): + logger.warning( + f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' " + f"due to empty upstream column name in table '{upstream_column_ref.table}'" + ) + continue + fine_grained_lineages.append( FineGrainedLineageClass( upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, diff --git a/metadata-ingestion/tests/unit/looker/__init__.py b/metadata-ingestion/tests/unit/looker/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metadata-ingestion/tests/unit/looker/test_looker_common.py b/metadata-ingestion/tests/unit/looker/test_looker_common.py new file mode 100644 index 0000000000..8ea6d5f6ed --- /dev/null +++ b/metadata-ingestion/tests/unit/looker/test_looker_common.py @@ -0,0 +1,93 @@ +import logging +from unittest.mock import MagicMock + +import pytest +from looker_sdk.sdk.api40.models import LookmlModelExplore, LookmlModelExploreField + +from datahub.ingestion.source.looker.looker_common import ExploreUpstreamViewField +from datahub.ingestion.source.looker.looker_config import LookerCommonConfig + + +class TestExploreUpstreamViewFieldFormFieldName: + """Test empty field name validation in _form_field_name method.""" + + @pytest.mark.parametrize( + "field_name", + [ + "test_view.", # Empty after dot + "test_view. ", # Whitespace after dot + ], + ) + def test_returns_none_for_empty_field_name(self, field_name, caplog): + """Test that empty field names return None and log warnings.""" + explore = LookmlModelExplore(name="test_explore") + field = LookmlModelExploreField( + name=field_name, type="string", original_view=None, field_group_variant=None + ) + upstream_field = ExploreUpstreamViewField(field=field, explore=explore) + config = MagicMock(spec=LookerCommonConfig) + + with caplog.at_level(logging.WARNING): + result = upstream_field._form_field_name( + view_project_map={}, + explore_project_name="test_project", + model_name="test_model", + upstream_views_file_path={}, + config=config, + ) + + assert result is None + assert "Empty field name detected" in caplog.text + assert field_name in caplog.text + assert "test_explore" in caplog.text + + def test_returns_none_for_invalid_field_format(self): + """Test that fields without proper view.field format return None.""" + explore = LookmlModelExplore(name="test_explore") + field = LookmlModelExploreField( + name="just_field_name", # No dot separator + type="string", + original_view=None, + field_group_variant=None, + ) + upstream_field = ExploreUpstreamViewField(field=field, explore=explore) + + result = upstream_field._form_field_name( + view_project_map={}, + explore_project_name="test_project", + model_name="test_model", + upstream_views_file_path={}, + config=MagicMock(spec=LookerCommonConfig), + ) + + assert result is None + + def test_variant_removal_causing_empty_name(self, caplog): + """Test that variant removal resulting in empty name returns None.""" + explore = LookmlModelExplore(name="test_explore") + # In Looker, dimension groups create fields with variants like "created_date_month", "created_date_year" + # This test simulates a pathological case where: + # - field name is "test_view.month" (after splitting by ".", field_name becomes "month") + # - field_group_variant is "month" + # - When remove_variant=True, it removes "_month" from "month", resulting in empty string + # - This should be handled gracefully by returning None and logging a warning + field = LookmlModelExploreField( + name="test_view.month", + type="string", + original_view=None, + field_group_variant="month", # When removed with underscore ("_month"), leaves empty field name + ) + upstream_field = ExploreUpstreamViewField(field=field, explore=explore) + + with caplog.at_level(logging.WARNING): + result = upstream_field._form_field_name( + view_project_map={}, + explore_project_name="test_project", + model_name="test_model", + upstream_views_file_path={}, + config=MagicMock(spec=LookerCommonConfig), + remove_variant=True, + ) + + assert result is None + assert "Empty field name detected" in caplog.text