mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-27 09:55:36 +00:00
385 lines
15 KiB
Python
385 lines
15 KiB
Python
from ast import literal_eval
|
|
from datetime import datetime
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
from metadata.data_quality.validations.base_test_handler import BaseTestValidator
|
|
from metadata.generated.schema.tests.basic import (
|
|
DimensionValue,
|
|
TestCaseDimensionResult,
|
|
TestCaseResult,
|
|
TestCaseStatus,
|
|
)
|
|
from metadata.generated.schema.tests.dimensionResult import DimensionResult
|
|
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
|
|
from metadata.utils.logger import test_suite_logger
|
|
|
|
logger = test_suite_logger()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"param_values, name, type_, default, expected",
|
|
[
|
|
([TestCaseParameterValue(name="str", value="test")], "str", str, None, "test"),
|
|
(
|
|
[TestCaseParameterValue(name="param", value="[1, 2, 3]")],
|
|
"param",
|
|
literal_eval,
|
|
None,
|
|
[1, 2, 3],
|
|
),
|
|
([TestCaseParameterValue(name="param", value="123")], "param", int, None, 123),
|
|
(
|
|
[TestCaseParameterValue(name="param", value=None)],
|
|
"param",
|
|
str,
|
|
"default",
|
|
"default",
|
|
),
|
|
],
|
|
)
|
|
def test_get_test_case_param_value(param_values, name, type_, default, expected):
|
|
result = BaseTestValidator.get_test_case_param_value(
|
|
param_values, name, type_, default
|
|
)
|
|
assert result == expected
|
|
|
|
|
|
class MockTestValidator(BaseTestValidator):
|
|
"""Mock validator for testing the new functionality"""
|
|
|
|
def _run_validation(self) -> TestCaseResult:
|
|
"""Mock implementation of _run_validation"""
|
|
# Return a real TestCaseResult object that the base class can modify
|
|
return TestCaseResult(
|
|
timestamp=self.execution_date,
|
|
testCaseStatus=TestCaseStatus.Success,
|
|
result="Test passed",
|
|
testResultValue=[],
|
|
)
|
|
|
|
def _run_dimensional_validation(self) -> list:
|
|
"""Execute dimensional validation for this test
|
|
|
|
This method should implement the dimensional logic specific to each test type.
|
|
It will be called automatically by the template method when dimensionColumns
|
|
are configured in the test case.
|
|
|
|
Returns:
|
|
List[DimensionResult]: List of dimension results
|
|
"""
|
|
# Default implementation returns empty list
|
|
return []
|
|
|
|
def _get_column_name(self, column_name=None):
|
|
"""Mock implementation of _get_column_name"""
|
|
# For testing purposes, accept any column name that's provided
|
|
# This simulates that all dimension columns exist
|
|
if column_name:
|
|
# Return a mock column for dimension columns
|
|
from unittest.mock import MagicMock
|
|
|
|
mock_column = MagicMock()
|
|
mock_column.name = column_name
|
|
return mock_column
|
|
return None # Return None for the main column (backward compatibility)
|
|
|
|
|
|
class TestBaseTestValidator:
|
|
"""Test class for BaseTestValidator"""
|
|
|
|
@pytest.fixture
|
|
def mock_test_case(self):
|
|
"""Create a mock test case"""
|
|
test_case = MagicMock(spec=TestCase)
|
|
test_case.name = "test_case"
|
|
test_case.fullyQualifiedName = "test.test_case"
|
|
return test_case
|
|
|
|
@pytest.fixture
|
|
def mock_execution_date(self):
|
|
"""Create a mock execution date"""
|
|
return int(datetime.now().timestamp())
|
|
|
|
@pytest.fixture
|
|
def validator(self, mock_test_case, mock_execution_date):
|
|
"""Create a validator instance for testing"""
|
|
runner = MagicMock()
|
|
return MockTestValidator(runner, mock_test_case, mock_execution_date)
|
|
|
|
@pytest.mark.parametrize(
|
|
"dimension_columns,expected",
|
|
[
|
|
(None, False),
|
|
([], False),
|
|
(["col1"], True),
|
|
(["col1", "col2"], True),
|
|
(["dimension_col"], True),
|
|
],
|
|
)
|
|
def test_is_dimensional_test(
|
|
self, validator, mock_test_case, dimension_columns, expected
|
|
):
|
|
"""Test is_dimensional_test method with various dimension column configurations"""
|
|
# Set up the test case with dimension columns
|
|
mock_test_case.dimensionColumns = dimension_columns
|
|
|
|
result = validator.is_dimensional_test()
|
|
assert result == expected
|
|
|
|
@pytest.mark.parametrize(
|
|
"dimension_values,passed_rows,failed_rows,total_rows,expected_percentages",
|
|
[
|
|
({"region": "US", "category": "A"}, 80, 20, 100, (80.0, 20.0)),
|
|
({"region": "EU", "category": "B"}, 50, 50, 100, (50.0, 50.0)),
|
|
({"region": "ASIA"}, 0, 100, 100, (0.0, 100.0)),
|
|
({"region": "US"}, 100, 0, 100, (100.0, 0.0)),
|
|
({"region": "EU"}, 25, 75, 100, (25.0, 75.0)),
|
|
],
|
|
)
|
|
def test_get_dimension_result_object(
|
|
self,
|
|
validator,
|
|
dimension_values,
|
|
passed_rows,
|
|
failed_rows,
|
|
total_rows,
|
|
expected_percentages,
|
|
):
|
|
"""Test get_dimension_result_object helper method with various scenarios"""
|
|
# Call the helper method
|
|
result = validator.get_dimension_result_object(
|
|
dimension_values=dimension_values,
|
|
test_case_status=TestCaseStatus.Success,
|
|
result=f"Passed: {passed_rows}, Failed: {failed_rows}",
|
|
test_result_value=[],
|
|
total_rows=total_rows,
|
|
passed_rows=passed_rows,
|
|
failed_rows=failed_rows,
|
|
)
|
|
|
|
# Verify the result structure
|
|
assert isinstance(result, DimensionResult)
|
|
|
|
# Verify dimension values were converted to DimensionValue objects
|
|
assert len(result.dimensionValues) == len(dimension_values)
|
|
for dim_val in result.dimensionValues:
|
|
assert isinstance(dim_val, DimensionValue)
|
|
assert dimension_values[dim_val.name] == dim_val.value
|
|
|
|
assert result.passedRows == passed_rows
|
|
assert result.failedRows == failed_rows
|
|
|
|
# Verify percentage calculations
|
|
expected_passed_pct, expected_failed_pct = expected_percentages
|
|
assert result.passedRowsPercentage == expected_passed_pct
|
|
assert result.failedRowsPercentage == expected_failed_pct
|
|
|
|
# Verify that percentages add up to 100% (or close to it due to floating point)
|
|
passed_pct = result.passedRowsPercentage or 0.0
|
|
failed_pct = result.failedRowsPercentage or 0.0
|
|
assert abs(passed_pct + failed_pct - 100.0) < 0.01
|
|
|
|
@pytest.mark.parametrize(
|
|
"total_rows,expected_percentages",
|
|
[
|
|
(0, (0.0, 0.0)), # Edge case: no rows
|
|
(1, (0.0, 100.0)), # Edge case: single row (0 passed, 1 failed)
|
|
(1000, (75.0, 25.0)), # Normal case
|
|
],
|
|
)
|
|
def test_get_dimension_result_object_edge_cases(
|
|
self, validator, total_rows, expected_percentages
|
|
):
|
|
"""Test get_dimension_result_object with edge cases"""
|
|
dimension_values = {"test": "value"}
|
|
passed_rows = int(total_rows * 0.75) if total_rows > 0 else 0
|
|
failed_rows = total_rows - passed_rows
|
|
|
|
result = validator.get_dimension_result_object(
|
|
dimension_values=dimension_values,
|
|
test_case_status=TestCaseStatus.Success,
|
|
result=f"Passed: {passed_rows}, Failed: {failed_rows}",
|
|
test_result_value=[],
|
|
total_rows=total_rows,
|
|
passed_rows=passed_rows,
|
|
failed_rows=failed_rows,
|
|
)
|
|
|
|
expected_passed_pct, expected_failed_pct = expected_percentages
|
|
assert result.passedRowsPercentage == expected_passed_pct
|
|
assert result.failedRowsPercentage == expected_failed_pct
|
|
|
|
@pytest.mark.parametrize(
|
|
"dimension_columns,test_description",
|
|
[
|
|
(None, "no dimensions configured"),
|
|
([], "empty dimensions list"),
|
|
],
|
|
)
|
|
def test_run_validation_no_dimensions_skip_dimensional(
|
|
self, validator, mock_test_case, dimension_columns, test_description
|
|
):
|
|
"""Test: When no dimensions are configured, dimensional validation should not run"""
|
|
# Setup: Set dimension columns
|
|
mock_test_case.dimensionColumns = dimension_columns
|
|
|
|
# Mock _run_dimensional_validation to track if it's called
|
|
validator._run_dimensional_validation = MagicMock(return_value=[])
|
|
|
|
# Execute
|
|
result = validator.run_validation()
|
|
|
|
# Verify
|
|
assert isinstance(result, TestCaseResult)
|
|
assert result.testCaseStatus == TestCaseStatus.Success
|
|
assert result.dimensionResults is None
|
|
|
|
# Verify dimensional validation was NOT called
|
|
validator._run_dimensional_validation.assert_not_called()
|
|
|
|
def test_run_validation_dimensions_configured_no_results(
|
|
self, validator, mock_test_case
|
|
):
|
|
"""Test: When dimensions configured but returns empty results, dimensionResults should be None"""
|
|
# Setup: Configure dimension columns
|
|
mock_test_case.dimensionColumns = ["region", "category"]
|
|
|
|
# Mock _run_dimensional_validation to return empty list
|
|
validator._run_dimensional_validation = MagicMock(return_value=[])
|
|
|
|
# Execute
|
|
result = validator.run_validation()
|
|
|
|
# Verify
|
|
assert isinstance(result, TestCaseResult)
|
|
assert result.testCaseStatus == TestCaseStatus.Success
|
|
|
|
# When dimensional validation returns empty list, dimensionResults remains None
|
|
assert result.dimensionResults is None
|
|
|
|
# Verify dimensional validation WAS called
|
|
validator._run_dimensional_validation.assert_called_once()
|
|
|
|
def test_run_validation_dimensions_configured_with_results(
|
|
self, validator, mock_test_case
|
|
):
|
|
"""Test: When dimensions configured and returns results, dimensionResults should contain them"""
|
|
# Setup: Configure dimension columns
|
|
mock_test_case.dimensionColumns = ["region", "category"]
|
|
|
|
# Create mock DimensionResult objects with all required fields
|
|
mock_dimension_result_1 = MagicMock(spec=DimensionResult)
|
|
mock_dimension_result_1.dimensionValues = [
|
|
DimensionValue(name="region", value="US"),
|
|
DimensionValue(name="category", value="A"),
|
|
]
|
|
mock_dimension_result_1.testCaseStatus = TestCaseStatus.Success
|
|
mock_dimension_result_1.passedRows = 80
|
|
mock_dimension_result_1.failedRows = 20
|
|
mock_dimension_result_1.passedRowsPercentage = 80.0
|
|
mock_dimension_result_1.failedRowsPercentage = 20.0
|
|
mock_dimension_result_1.result = "Passed: 80, Failed: 20"
|
|
mock_dimension_result_1.testResultValue = []
|
|
mock_dimension_result_1.impactScore = None
|
|
|
|
mock_dimension_result_2 = MagicMock(spec=DimensionResult)
|
|
mock_dimension_result_2.dimensionValues = [
|
|
DimensionValue(name="region", value="EU"),
|
|
DimensionValue(name="category", value="B"),
|
|
]
|
|
mock_dimension_result_2.testCaseStatus = TestCaseStatus.Failed
|
|
mock_dimension_result_2.passedRows = 50
|
|
mock_dimension_result_2.failedRows = 50
|
|
mock_dimension_result_2.passedRowsPercentage = 50.0
|
|
mock_dimension_result_2.failedRowsPercentage = 50.0
|
|
mock_dimension_result_2.result = "Passed: 50, Failed: 50"
|
|
mock_dimension_result_2.testResultValue = []
|
|
mock_dimension_result_2.impactScore = None
|
|
|
|
# Mock _run_dimensional_validation to return DimensionResult objects
|
|
validator._run_dimensional_validation = MagicMock(
|
|
return_value=[mock_dimension_result_1, mock_dimension_result_2]
|
|
)
|
|
|
|
# Execute
|
|
result = validator.run_validation()
|
|
|
|
# Verify
|
|
assert isinstance(result, TestCaseResult)
|
|
assert result.testCaseStatus == TestCaseStatus.Success
|
|
|
|
# When dimensional validation returns results, they should be converted to TestCaseDimensionResult
|
|
assert result.dimensionResults is not None
|
|
assert len(result.dimensionResults) == 2
|
|
|
|
# Verify the dimension results are TestCaseDimensionResult instances
|
|
for dim_result in result.dimensionResults:
|
|
assert isinstance(dim_result, TestCaseDimensionResult)
|
|
|
|
# Verify the first dimension result has correct values
|
|
first_result = result.dimensionResults[0]
|
|
assert first_result.dimensionKey == "region=US,category=A"
|
|
assert first_result.testCaseStatus == TestCaseStatus.Success
|
|
assert first_result.passedRows == 80
|
|
assert first_result.failedRows == 20
|
|
|
|
# Verify the second dimension result has correct values
|
|
second_result = result.dimensionResults[1]
|
|
assert second_result.dimensionKey == "region=EU,category=B"
|
|
assert second_result.testCaseStatus == TestCaseStatus.Failed
|
|
assert second_result.passedRows == 50
|
|
assert second_result.failedRows == 50
|
|
|
|
# Verify dimensional validation WAS called
|
|
validator._run_dimensional_validation.assert_called_once()
|
|
|
|
def test_run_validation_dimensional_not_implemented(
|
|
self, validator, mock_test_case
|
|
):
|
|
"""Test: When dimensional validation raises NotImplementedError, main test still succeeds"""
|
|
# Setup: Configure dimension columns
|
|
mock_test_case.dimensionColumns = ["region"]
|
|
|
|
# Mock _run_dimensional_validation to raise NotImplementedError
|
|
validator._run_dimensional_validation = MagicMock(
|
|
side_effect=NotImplementedError("Dimensional validation not implemented")
|
|
)
|
|
|
|
# Execute
|
|
result = validator.run_validation()
|
|
|
|
# Verify: Main test should still succeed despite NotImplementedError
|
|
assert isinstance(result, TestCaseResult)
|
|
assert result.testCaseStatus == TestCaseStatus.Success
|
|
assert (
|
|
result.dimensionResults is None
|
|
) # No dimension results due to NotImplementedError
|
|
|
|
# Verify dimensional validation WAS attempted
|
|
validator._run_dimensional_validation.assert_called_once()
|
|
|
|
def test_run_validation_dimensional_raises_exception(
|
|
self, validator, mock_test_case
|
|
):
|
|
"""Test: When dimensional validation raises Exception, main test still succeeds"""
|
|
# Setup: Configure dimension columns
|
|
mock_test_case.dimensionColumns = ["region", "category"]
|
|
|
|
# Mock _run_dimensional_validation to raise a general exception
|
|
validator._run_dimensional_validation = MagicMock(
|
|
side_effect=RuntimeError("Something went wrong in dimensional validation")
|
|
)
|
|
|
|
# Execute
|
|
result = validator.run_validation()
|
|
|
|
# Verify: Main test should still succeed despite the exception
|
|
assert isinstance(result, TestCaseResult)
|
|
assert result.testCaseStatus == TestCaseStatus.Success
|
|
assert result.dimensionResults is None # No dimension results due to exception
|
|
|
|
# Verify dimensional validation WAS attempted
|
|
validator._run_dimensional_validation.assert_called_once()
|