2024-10-15 16:29:43 +02:00
|
|
|
from ast import literal_eval
|
2025-09-03 11:22:55 +02:00
|
|
|
from datetime import datetime
|
|
|
|
from typing import List
|
|
|
|
from unittest.mock import MagicMock
|
2024-10-15 16:29:43 +02:00
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
|
2025-09-03 11:22:55 +02:00
|
|
|
from metadata.generated.schema.tests.basic import (
|
|
|
|
DimensionResult,
|
|
|
|
TestCaseResult,
|
|
|
|
TestCaseStatus,
|
|
|
|
)
|
|
|
|
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
|
|
|
|
from metadata.utils.logger import test_suite_logger
|
|
|
|
|
|
|
|
logger = test_suite_logger()
|
2024-10-15 16:29:43 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"param_values, name, type_, default, expected",
|
|
|
|
[
|
|
|
|
([TestCaseParameterValue(name="str", value="test")], "str", str, None, "test"),
|
|
|
|
(
|
|
|
|
[TestCaseParameterValue(name="param", value="[1, 2, 3]")],
|
|
|
|
"param",
|
|
|
|
literal_eval,
|
|
|
|
None,
|
|
|
|
[1, 2, 3],
|
|
|
|
),
|
|
|
|
([TestCaseParameterValue(name="param", value="123")], "param", int, None, 123),
|
|
|
|
(
|
|
|
|
[TestCaseParameterValue(name="param", value=None)],
|
|
|
|
"param",
|
|
|
|
str,
|
|
|
|
"default",
|
|
|
|
"default",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_get_test_case_param_value(param_values, name, type_, default, expected):
|
|
|
|
result = BaseTestValidator.get_test_case_param_value(
|
|
|
|
param_values, name, type_, default
|
|
|
|
)
|
|
|
|
assert result == expected
|
2025-09-03 11:22:55 +02:00
|
|
|
|
|
|
|
|
|
|
|
class MockTestValidator(BaseTestValidator):
|
|
|
|
"""Mock validator for testing the new functionality"""
|
|
|
|
|
|
|
|
def _run_validation(self) -> TestCaseResult:
|
|
|
|
"""Mock implementation of _run_validation"""
|
|
|
|
# Create a mock TestCaseResult to avoid complex schema validation issues
|
|
|
|
mock_result = MagicMock(spec=TestCaseResult)
|
|
|
|
mock_result.testCaseStatus = TestCaseStatus.Success
|
|
|
|
mock_result.dimensionResults = []
|
|
|
|
return mock_result
|
|
|
|
|
|
|
|
def _run_dimensional_validation(self) -> List["DimensionResult"]:
|
|
|
|
"""Execute dimensional validation for this test
|
|
|
|
|
|
|
|
This method should implement the dimensional logic specific to each test type.
|
|
|
|
It will be called automatically by the template method when dimensionColumns
|
|
|
|
are configured in the test case.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
List[DimensionResult]: List of dimension-specific test results
|
|
|
|
"""
|
|
|
|
# Default implementation returns empty list
|
|
|
|
return []
|
|
|
|
|
|
|
|
def _get_column_name(self):
|
|
|
|
"""Mock implementation of _get_column_name"""
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
class TestBaseTestValidator:
|
|
|
|
"""Test class for BaseTestValidator"""
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def mock_test_case(self):
|
|
|
|
"""Create a mock test case"""
|
|
|
|
test_case = MagicMock(spec=TestCase)
|
|
|
|
test_case.name = "test_case"
|
|
|
|
test_case.fullyQualifiedName = "test.test_case"
|
|
|
|
return test_case
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def mock_execution_date(self):
|
|
|
|
"""Create a mock execution date"""
|
|
|
|
return datetime.now()
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def validator(self, mock_test_case, mock_execution_date):
|
|
|
|
"""Create a validator instance for testing"""
|
|
|
|
runner = MagicMock()
|
|
|
|
return MockTestValidator(runner, mock_test_case, mock_execution_date)
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"dimension_columns,expected",
|
|
|
|
[
|
|
|
|
(None, False),
|
|
|
|
([], False),
|
|
|
|
(["col1"], True),
|
|
|
|
(["col1", "col2"], True),
|
|
|
|
(["dimension_col"], True),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_is_dimensional_test(
|
|
|
|
self, validator, mock_test_case, dimension_columns, expected
|
|
|
|
):
|
|
|
|
"""Test is_dimensional_test method with various dimension column configurations"""
|
|
|
|
# Set up the test case with dimension columns
|
|
|
|
mock_test_case.dimensionColumns = dimension_columns
|
|
|
|
|
|
|
|
result = validator.is_dimensional_test()
|
|
|
|
assert result == expected
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"has_dimensions,expected_dimension_results",
|
|
|
|
[
|
|
|
|
(False, []),
|
|
|
|
(True, []),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_run_validation_flow(
|
|
|
|
self, validator, mock_test_case, has_dimensions, expected_dimension_results
|
|
|
|
):
|
|
|
|
"""Test the run_validation template method flow"""
|
|
|
|
# Set up dimension columns
|
|
|
|
mock_test_case.dimensionColumns = ["col1", "col2"] if has_dimensions else None
|
|
|
|
|
|
|
|
# Mock the dimensional validation to return expected results
|
|
|
|
if has_dimensions:
|
|
|
|
validator._run_dimensional_validation = MagicMock(
|
|
|
|
return_value=expected_dimension_results
|
|
|
|
)
|
|
|
|
|
|
|
|
# Execute the template method
|
|
|
|
result = validator.run_validation()
|
|
|
|
|
|
|
|
# Verify the result structure
|
|
|
|
assert isinstance(result, TestCaseResult)
|
|
|
|
assert result.testCaseStatus == TestCaseStatus.Success
|
|
|
|
|
|
|
|
# Verify dimensional results are set correctly
|
|
|
|
if has_dimensions:
|
|
|
|
assert result.dimensionResults == expected_dimension_results
|
|
|
|
# Verify that _run_dimensional_validation was called
|
|
|
|
validator._run_dimensional_validation.assert_called_once()
|
|
|
|
else:
|
|
|
|
assert result.dimensionResults is None or result.dimensionResults == []
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"dimension_values,passed_rows,failed_rows,total_rows,expected_percentages",
|
|
|
|
[
|
|
|
|
({"region": "US", "category": "A"}, 80, 20, 100, (80.0, 20.0)),
|
|
|
|
({"region": "EU", "category": "B"}, 50, 50, 100, (50.0, 50.0)),
|
|
|
|
({"region": "ASIA"}, 0, 100, 100, (0.0, 100.0)),
|
|
|
|
({"region": "US"}, 100, 0, 100, (100.0, 0.0)),
|
|
|
|
({"region": "EU"}, 25, 75, 100, (25.0, 75.0)),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_get_dimension_result_object(
|
|
|
|
self,
|
|
|
|
validator,
|
|
|
|
dimension_values,
|
|
|
|
passed_rows,
|
|
|
|
failed_rows,
|
|
|
|
total_rows,
|
|
|
|
expected_percentages,
|
|
|
|
):
|
|
|
|
"""Test get_dimension_result_object helper method with various scenarios"""
|
|
|
|
# Call the helper method
|
|
|
|
result = validator.get_dimension_result_object(
|
|
|
|
dimension_values=dimension_values,
|
|
|
|
test_case_status=TestCaseStatus.Success,
|
|
|
|
result=f"Passed: {passed_rows}, Failed: {failed_rows}",
|
|
|
|
test_result_value=[],
|
|
|
|
total_rows=total_rows,
|
|
|
|
passed_rows=passed_rows,
|
|
|
|
failed_rows=failed_rows,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Verify the result structure
|
|
|
|
assert isinstance(result, DimensionResult)
|
|
|
|
assert result.dimensionValues == dimension_values
|
|
|
|
assert result.passedRows == passed_rows
|
|
|
|
assert result.failedRows == failed_rows
|
|
|
|
|
|
|
|
# Verify percentage calculations
|
|
|
|
expected_passed_pct, expected_failed_pct = expected_percentages
|
|
|
|
assert result.passedRowsPercentage == expected_passed_pct
|
|
|
|
assert result.failedRowsPercentage == expected_failed_pct
|
|
|
|
|
|
|
|
# Verify that percentages add up to 100% (or close to it due to floating point)
|
|
|
|
passed_pct = result.passedRowsPercentage or 0.0
|
|
|
|
failed_pct = result.failedRowsPercentage or 0.0
|
|
|
|
assert abs(passed_pct + failed_pct - 100.0) < 0.01
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"total_rows,expected_percentages",
|
|
|
|
[
|
|
|
|
(0, (0.0, 0.0)), # Edge case: no rows
|
|
|
|
(1, (0.0, 100.0)), # Edge case: single row (0 passed, 1 failed)
|
|
|
|
(1000, (75.0, 25.0)), # Normal case
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_get_dimension_result_object_edge_cases(
|
|
|
|
self, validator, total_rows, expected_percentages
|
|
|
|
):
|
|
|
|
"""Test get_dimension_result_object with edge cases"""
|
|
|
|
dimension_values = {"test": "value"}
|
|
|
|
passed_rows = int(total_rows * 0.75) if total_rows > 0 else 0
|
|
|
|
failed_rows = total_rows - passed_rows
|
|
|
|
|
|
|
|
result = validator.get_dimension_result_object(
|
|
|
|
dimension_values=dimension_values,
|
|
|
|
test_case_status=TestCaseStatus.Success,
|
|
|
|
result=f"Passed: {passed_rows}, Failed: {failed_rows}",
|
|
|
|
test_result_value=[],
|
|
|
|
total_rows=total_rows,
|
|
|
|
passed_rows=passed_rows,
|
|
|
|
failed_rows=failed_rows,
|
|
|
|
)
|
|
|
|
|
|
|
|
expected_passed_pct, expected_failed_pct = expected_percentages
|
|
|
|
assert result.passedRowsPercentage == expected_passed_pct
|
|
|
|
assert result.failedRowsPercentage == expected_failed_pct
|
|
|
|
|
|
|
|
def test_run_validation_calls_abstract_methods(self, validator, mock_test_case):
|
|
|
|
"""Test that run_validation properly calls the abstract methods"""
|
|
|
|
# Mock the abstract methods
|
|
|
|
validator._run_validation = MagicMock(
|
|
|
|
return_value=MagicMock(spec=TestCaseResult)
|
|
|
|
)
|
|
|
|
validator._run_dimensional_validation = MagicMock(return_value=[])
|
|
|
|
|
|
|
|
# Set up test case with dimensions
|
|
|
|
mock_test_case.dimensionColumns = ["col1"]
|
|
|
|
|
|
|
|
# Execute
|
|
|
|
validator.run_validation()
|
|
|
|
|
|
|
|
# Verify abstract methods were called
|
|
|
|
validator._run_validation.assert_called_once()
|
|
|
|
validator._run_dimensional_validation.assert_called_once()
|
|
|
|
|
|
|
|
def test_run_validation_no_dimensions_skips_dimensional_validation(
|
|
|
|
self, validator, mock_test_case
|
|
|
|
):
|
|
|
|
"""Test that run_validation skips dimensional validation when no dimensions are set"""
|
|
|
|
# Mock the abstract methods
|
|
|
|
validator._run_validation = MagicMock(
|
|
|
|
return_value=MagicMock(spec=TestCaseResult)
|
|
|
|
)
|
|
|
|
validator._run_dimensional_validation = MagicMock(return_value=[])
|
|
|
|
|
|
|
|
# Set up test case without dimensions
|
|
|
|
mock_test_case.dimensionColumns = None
|
|
|
|
|
|
|
|
# Execute
|
|
|
|
validator.run_validation()
|
|
|
|
|
|
|
|
# Verify only _run_validation was called
|
|
|
|
validator._run_validation.assert_called_once()
|
|
|
|
validator._run_dimensional_validation.assert_not_called()
|