From 03ede24ebac29500d27a6b6cdb93c46d7cb9da22 Mon Sep 17 00:00:00 2001 From: Pablo Takara Date: Thu, 25 Sep 2025 10:40:44 +0200 Subject: [PATCH] Removed useless method to use the one we already had --- .../validations/base_test_handler.py | 35 +++++++++++-------- .../column/base/columnValuesToBeInSet.py | 14 -------- .../column/base/columnValuesToBeUnique.py | 17 --------- .../validations/test_base_handler.py | 20 +++++------ .../test_suite/test_validations_databases.py | 18 +--------- 5 files changed, 32 insertions(+), 72 deletions(-) diff --git a/ingestion/src/metadata/data_quality/validations/base_test_handler.py b/ingestion/src/metadata/data_quality/validations/base_test_handler.py index 133ea2b9236..98e3a28ae6d 100644 --- a/ingestion/src/metadata/data_quality/validations/base_test_handler.py +++ b/ingestion/src/metadata/data_quality/validations/base_test_handler.py @@ -106,9 +106,7 @@ class BaseTestValidator(ABC): logger.debug(f"Dimension columns: {self.test_case.dimensionColumns}") # Validate dimension columns exist in the target table - validation_error = self.validate_dimension_columns() - if validation_error: - logger.warning(f"Dimensional validation skipped: {validation_error}") + if not self.are_dimension_columns_valid(): # Don't abort the main test, just skip dimensional validation # The main test result is still valid return test_result @@ -278,35 +276,44 @@ class BaseTestValidator(ABC): return test_case_dimension_results - def validate_dimension_columns(self) -> Optional[str]: + def are_dimension_columns_valid(self) -> bool: """Validate that all dimension columns exist in the target table Returns: - Optional[str]: Error message if validation fails, None if successful + bool: True if all dimension columns are valid, False otherwise """ if not self.is_dimensional_test(): - return None + return False # No dimensions to validate + + if not hasattr(self, "_get_column_name"): + logger.warning("Validator does not support dimensional column validation") + return False try: missing_columns = [] for dim_col in self.test_case.dimensionColumns: try: - # Delegate to child classes via get_dimension_column method - # which uses the _get_column_name(column_name) pattern - self.get_dimension_column(dim_col) + self._get_column_name(dim_col) # type: ignore[attr-defined] - Delegates to child class except ValueError: missing_columns.append(dim_col) - except AttributeError: + except NotImplementedError: # Child class doesn't support dimensional validation yet - return f"Validator does not support dimensional column validation" + logger.warning( + "Validator does not support dimensional column validation" + ) + return False if missing_columns: - return f"Dimension columns not found in table: {', '.join(missing_columns)}" + logger.warning( + f"Dimensional validation skipped: Dimension columns not found in table: {', '.join(missing_columns)}" + ) + return False - return None + return True except Exception as exc: - return f"Unable to validate dimension columns: {exc}" + logger.warning(f"Unable to validate dimension columns: {exc}") + return False def get_dimension_result_object( self, diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py index f8ee48afe42..48adb6e85fb 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py @@ -215,17 +215,3 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator): Tuple[int, int]: """ return self.compute_row_count(self._get_column_name()) - - def get_dimension_column(self, column_name: str): - """Get dimension column object by name for validation - - Args: - column_name: Name of the dimension column to retrieve - - Returns: - Column object (type depends on implementation) - - Raises: - ValueError: If column is not found in the data source - """ - return self._get_column_name(column_name) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py index f4ed87a8b6c..cefb1289691 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py @@ -204,20 +204,3 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator): Example: [DimensionResult(dimensionValues={"country": "Spain"}, ...), DimensionResult(dimensionValues={"country": "Argentina"}, ...)] """ raise NotImplementedError - - def get_dimension_column(self, column_name: str): - """Get column object for dimension validation - - This method is called by the base class to validate that dimension columns exist. - It delegates to the existing _get_column_name method that child classes already implement. - - Args: - column_name: Name of the dimension column - - Returns: - Column object for the dimension column - - Raises: - ValueError: If column doesn't exist - """ - return self._get_column_name(column_name) diff --git a/ingestion/tests/unit/data_quality/validations/test_base_handler.py b/ingestion/tests/unit/data_quality/validations/test_base_handler.py index e1037f5b7e1..24681996f51 100644 --- a/ingestion/tests/unit/data_quality/validations/test_base_handler.py +++ b/ingestion/tests/unit/data_quality/validations/test_base_handler.py @@ -72,18 +72,18 @@ class MockTestValidator(BaseTestValidator): # Default implementation returns empty list return [] - def _get_column_name(self): + def _get_column_name(self, column_name=None): """Mock implementation of _get_column_name""" - return None + # For testing purposes, accept any column name that's provided + # This simulates that all dimension columns exist + if column_name: + # Return a mock column for dimension columns + from unittest.mock import MagicMock - def get_dimension_column(self, column_name: str): - """Mock implementation of get_dimension_column for dimensional validation""" - # Return a mock column object - from unittest.mock import MagicMock - - mock_column = MagicMock() - mock_column.name = column_name - return mock_column + mock_column = MagicMock() + mock_column.name = column_name + return mock_column + return None # Return None for the main column (backward compatibility) class TestBaseTestValidator: diff --git a/ingestion/tests/unit/test_suite/test_validations_databases.py b/ingestion/tests/unit/test_suite/test_validations_databases.py index 1e46fde356d..fcbe7d9c44c 100644 --- a/ingestion/tests/unit/test_suite/test_validations_databases.py +++ b/ingestion/tests/unit/test_suite/test_validations_databases.py @@ -712,11 +712,6 @@ def test_column_values_to_be_in_set_dimensional_validation(create_sqlite_table): mock_get_column.side_effect = mock_get_column_side_effect - # Also mock get_dimension_column for dimensional validation - test_handler.get_dimension_column = MagicMock( - side_effect=mock_get_column_side_effect - ) - res = test_handler.run_validation() # Verify main test result @@ -800,9 +795,7 @@ def test_column_values_to_be_in_set_invalid_dimension_column(create_sqlite_table # Mock column resolution to raise ValueError for invalid column with patch.object(test_handler, "_run_results", return_value=23), patch.object( test_handler, "_get_column_name" - ) as mock_get_column, patch.object( - test_handler, "get_dimension_column" - ) as mock_get_dimension_column: + ) as mock_get_column: # Mock main column resolution from unittest.mock import MagicMock @@ -814,15 +807,6 @@ def test_column_values_to_be_in_set_invalid_dimension_column(create_sqlite_table mock_main_column.type = String() mock_get_column.return_value = mock_main_column - # Mock dimension column resolution to raise error for invalid column - def mock_get_dimension_column_side_effect(column_name): - if column_name == "invalid_column": - raise ValueError(f"Column {column_name} not found in table") - # This shouldn't be called for other columns in this test - raise AssertionError(f"Unexpected column: {column_name}") - - mock_get_dimension_column.side_effect = mock_get_dimension_column_side_effect - res = test_handler.run_validation() # Main test should still succeed even when dimension columns are invalid