mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-27 18:45:50 +00:00
fix(ingest/looker): Ignore fields with empty column name to not generate invalid field urns (#14338)
This commit is contained in:
parent
be252bb0c7
commit
f1e7ce72f0
@ -379,6 +379,14 @@ class ExploreUpstreamViewField:
|
||||
: -(len(self.field.field_group_variant.lower()) + 1)
|
||||
]
|
||||
|
||||
# Validate that field_name is not empty to prevent invalid schema field URNs
|
||||
if not field_name or not field_name.strip():
|
||||
logger.warning(
|
||||
f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. "
|
||||
f"Skipping field to prevent invalid schema field URN generation."
|
||||
)
|
||||
return None
|
||||
|
||||
assert view_name # for lint false positive
|
||||
|
||||
project_include: ProjectInclude = ProjectInclude(
|
||||
@ -1351,7 +1359,25 @@ class LookerExplore:
|
||||
fine_grained_lineages = []
|
||||
if config.extract_column_level_lineage:
|
||||
for field in self.fields or []:
|
||||
# Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
|
||||
if not field.name or not field.name.strip():
|
||||
logger.warning(
|
||||
f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'"
|
||||
)
|
||||
continue
|
||||
|
||||
for upstream_column_ref in field.upstream_fields:
|
||||
# Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs
|
||||
if (
|
||||
not upstream_column_ref.column
|
||||
or not upstream_column_ref.column.strip()
|
||||
):
|
||||
logger.warning(
|
||||
f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' "
|
||||
f"due to empty upstream column name in table '{upstream_column_ref.table}'"
|
||||
)
|
||||
continue
|
||||
|
||||
fine_grained_lineages.append(
|
||||
FineGrainedLineageClass(
|
||||
upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
|
||||
|
0
metadata-ingestion/tests/unit/looker/__init__.py
Normal file
0
metadata-ingestion/tests/unit/looker/__init__.py
Normal file
93
metadata-ingestion/tests/unit/looker/test_looker_common.py
Normal file
93
metadata-ingestion/tests/unit/looker/test_looker_common.py
Normal file
@ -0,0 +1,93 @@
|
||||
import logging
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from looker_sdk.sdk.api40.models import LookmlModelExplore, LookmlModelExploreField
|
||||
|
||||
from datahub.ingestion.source.looker.looker_common import ExploreUpstreamViewField
|
||||
from datahub.ingestion.source.looker.looker_config import LookerCommonConfig
|
||||
|
||||
|
||||
class TestExploreUpstreamViewFieldFormFieldName:
|
||||
"""Test empty field name validation in _form_field_name method."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"field_name",
|
||||
[
|
||||
"test_view.", # Empty after dot
|
||||
"test_view. ", # Whitespace after dot
|
||||
],
|
||||
)
|
||||
def test_returns_none_for_empty_field_name(self, field_name, caplog):
|
||||
"""Test that empty field names return None and log warnings."""
|
||||
explore = LookmlModelExplore(name="test_explore")
|
||||
field = LookmlModelExploreField(
|
||||
name=field_name, type="string", original_view=None, field_group_variant=None
|
||||
)
|
||||
upstream_field = ExploreUpstreamViewField(field=field, explore=explore)
|
||||
config = MagicMock(spec=LookerCommonConfig)
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
result = upstream_field._form_field_name(
|
||||
view_project_map={},
|
||||
explore_project_name="test_project",
|
||||
model_name="test_model",
|
||||
upstream_views_file_path={},
|
||||
config=config,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
assert "Empty field name detected" in caplog.text
|
||||
assert field_name in caplog.text
|
||||
assert "test_explore" in caplog.text
|
||||
|
||||
def test_returns_none_for_invalid_field_format(self):
|
||||
"""Test that fields without proper view.field format return None."""
|
||||
explore = LookmlModelExplore(name="test_explore")
|
||||
field = LookmlModelExploreField(
|
||||
name="just_field_name", # No dot separator
|
||||
type="string",
|
||||
original_view=None,
|
||||
field_group_variant=None,
|
||||
)
|
||||
upstream_field = ExploreUpstreamViewField(field=field, explore=explore)
|
||||
|
||||
result = upstream_field._form_field_name(
|
||||
view_project_map={},
|
||||
explore_project_name="test_project",
|
||||
model_name="test_model",
|
||||
upstream_views_file_path={},
|
||||
config=MagicMock(spec=LookerCommonConfig),
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_variant_removal_causing_empty_name(self, caplog):
|
||||
"""Test that variant removal resulting in empty name returns None."""
|
||||
explore = LookmlModelExplore(name="test_explore")
|
||||
# In Looker, dimension groups create fields with variants like "created_date_month", "created_date_year"
|
||||
# This test simulates a pathological case where:
|
||||
# - field name is "test_view.month" (after splitting by ".", field_name becomes "month")
|
||||
# - field_group_variant is "month"
|
||||
# - When remove_variant=True, it removes "_month" from "month", resulting in empty string
|
||||
# - This should be handled gracefully by returning None and logging a warning
|
||||
field = LookmlModelExploreField(
|
||||
name="test_view.month",
|
||||
type="string",
|
||||
original_view=None,
|
||||
field_group_variant="month", # When removed with underscore ("_month"), leaves empty field name
|
||||
)
|
||||
upstream_field = ExploreUpstreamViewField(field=field, explore=explore)
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
result = upstream_field._form_field_name(
|
||||
view_project_map={},
|
||||
explore_project_name="test_project",
|
||||
model_name="test_model",
|
||||
upstream_views_file_path={},
|
||||
config=MagicMock(spec=LookerCommonConfig),
|
||||
remove_variant=True,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
assert "Empty field name detected" in caplog.text
|
Loading…
x
Reference in New Issue
Block a user