mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-30 11:26:23 +00:00

* fix(dq): enable ''Column values to be in set'' test case for boolean columns Add BOOLEAN to supportedDataTypes array in columnValuesToBeInSet.json to allow boolean column validation with predefined allowed values. This enables users to enforce strict true/false validation on boolean columns directly at the column level, resolving issue #22099. Co-authored-by: IceS2 <IceS2@users.noreply.github.com> * Add tests to the new feature * Add migrations and columnValuesToBeNotInSet --------- Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: IceS2 <IceS2@users.noreply.github.com>
528 lines
15 KiB
Python
528 lines
15 KiB
Python
# Copyright 2025 Collate
|
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Test Table and Column Tests' validate implementations.
|
|
|
|
Each test should validate the Success, Failure and Aborted statuses
|
|
"""
|
|
from datetime import date, datetime
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
|
from metadata.utils.importer import import_test_case_class
|
|
|
|
EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
|
|
|
|
|
# pylint: disable=line-too-long
|
|
@pytest.mark.parametrize(
|
|
"test_case_name,test_case_type,test_type,expected",
|
|
[
|
|
(
|
|
"test_case_column_values_to_be_between_date",
|
|
"columnValuesToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"2021-07-01 00:00:00",
|
|
"2021-07-01 23:59:59.999999",
|
|
TestCaseStatus.Failed,
|
|
0.0,
|
|
30.0,
|
|
0.0,
|
|
100.0,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_values_to_be_between_datetime",
|
|
"columnValuesToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"2021-07-01 10:37:59",
|
|
"2021-07-01 10:37:59",
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_length_to_be_between",
|
|
"columnValueLengthsToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"0",
|
|
"8",
|
|
TestCaseStatus.Failed,
|
|
20.0,
|
|
10.0,
|
|
66.67,
|
|
33.33,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_length_to_be_between_col_space",
|
|
"columnValueLengthsToBeBetween",
|
|
"COLUMN",
|
|
(TestCaseResult, "2", "3", TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
|
|
),
|
|
(
|
|
"test_case_column_value_length_to_be_between_no_min",
|
|
"columnValueLengthsToBeBetween",
|
|
"COLUMN",
|
|
(TestCaseResult, None, None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
|
|
),
|
|
(
|
|
"test_case_column_value_max_to_be_between",
|
|
"columnValueMaxToBeBetween",
|
|
"COLUMN",
|
|
(TestCaseResult, "31", None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_column_value_max_to_be_between_no_min",
|
|
"columnValueMaxToBeBetween",
|
|
"COLUMN",
|
|
(TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_column_value_mean_to_be_between",
|
|
"columnValueMeanToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"30.5",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_mean_to_be_between_no_max",
|
|
"columnValueMeanToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
None,
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_median_to_be_between",
|
|
"columnValueMedianToBeBetween",
|
|
"COLUMN",
|
|
(TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_column_value_min_to_be_between",
|
|
"columnValueMinToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"30",
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_min_to_be_between_no_min",
|
|
"columnValueMinToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
None,
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_stddev_to_be_between",
|
|
"columnValueStdDevToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"0.25",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_stddev_to_be_between_no_min",
|
|
"columnValueStdDevToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
None,
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_in_set",
|
|
"columnValuesToBeInSet",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"20",
|
|
None,
|
|
TestCaseStatus.Success,
|
|
20.0,
|
|
10.0,
|
|
66.67,
|
|
33.33,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_values_missing_count_to_be_equal",
|
|
"columnValuesMissingCount",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"20",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_values_missing_count_to_be_equal_missing_values",
|
|
"columnValuesMissingCount",
|
|
"COLUMN",
|
|
(TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_column_values_not_in_set",
|
|
"columnValuesToBeNotInSet",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"20",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
10.0,
|
|
20.0,
|
|
33.33,
|
|
66.67,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_sum_to_be_between",
|
|
"columnValuesSumToBeBetween",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"610",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_values_to_be_between",
|
|
"columnValuesToBeBetween",
|
|
"COLUMN",
|
|
(TestCaseResult, "30", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
|
|
),
|
|
(
|
|
"test_case_column_values_to_be_not_null",
|
|
"columnValuesToBeNotNull",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"10",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
20.0,
|
|
10.0,
|
|
66.67,
|
|
33.33,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_values_to_be_unique",
|
|
"columnValuesToBeUnique",
|
|
"COLUMN",
|
|
(TestCaseResult, "20", "0", TestCaseStatus.Failed, 0.0, 20.0, 0.0, 100.0),
|
|
),
|
|
(
|
|
"test_case_column_values_to_match_regex",
|
|
"columnValuesToMatchRegex",
|
|
"COLUMN",
|
|
(TestCaseResult, "30", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
|
|
),
|
|
(
|
|
"test_case_column_values_to_not_match_regex",
|
|
"columnValuesToNotMatchRegex",
|
|
"COLUMN",
|
|
(TestCaseResult, "0", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
|
|
),
|
|
(
|
|
"test_case_table_column_count_to_be_between",
|
|
"tableColumnCountToBeBetween",
|
|
"TABLE",
|
|
(
|
|
TestCaseResult,
|
|
"11",
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_table_column_count_to_equal",
|
|
"tableColumnCountToEqual",
|
|
"TABLE",
|
|
(TestCaseResult, "11", None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_table_column_name_to_exist",
|
|
"tableColumnNameToExist",
|
|
"TABLE",
|
|
(TestCaseResult, "1", None, TestCaseStatus.Success, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_column_to_match_set",
|
|
"tableColumnToMatchSet",
|
|
"TABLE",
|
|
(
|
|
TestCaseResult,
|
|
"0",
|
|
None,
|
|
TestCaseStatus.Failed,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_to_match_set_ordered",
|
|
"tableColumnToMatchSet",
|
|
"TABLE",
|
|
(TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_table_custom_sql_query",
|
|
"tableCustomSQLQuery",
|
|
"TABLE",
|
|
(TestCaseResult, "20", None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_table_custom_sql_query_success",
|
|
"tableCustomSQLQuery",
|
|
"TABLE",
|
|
(TestCaseResult, "0", None, TestCaseStatus.Success, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_table_row_count_to_be_between",
|
|
"tableRowCountToBeBetween",
|
|
"TABLE",
|
|
(
|
|
TestCaseResult,
|
|
"30",
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_table_row_count_to_be_equal",
|
|
"tableRowCountToEqual",
|
|
"TABLE",
|
|
(TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_table_row_inserted_count_to_be_between",
|
|
"tableRowInsertedCountToBeBetween",
|
|
"TABLE",
|
|
(TestCaseResult, "6", None, TestCaseStatus.Success, None, None, None, None),
|
|
),
|
|
(
|
|
"test_case_table_custom_sql_query_with_threshold_success",
|
|
"tableCustomSQLQuery",
|
|
"TABLE",
|
|
(
|
|
TestCaseResult,
|
|
"10",
|
|
None,
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_table_custom_sql_unsafe_query_aborted",
|
|
"tableCustomSQLQuery",
|
|
"TABLE",
|
|
(
|
|
TestCaseResult,
|
|
None,
|
|
None,
|
|
TestCaseStatus.Aborted,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_values_to_be_at_expected_location",
|
|
"columnValuesToBeAtExpectedLocation",
|
|
"COLUMN",
|
|
(
|
|
TestCaseResult,
|
|
"30",
|
|
"0",
|
|
TestCaseStatus.Success,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
),
|
|
),
|
|
(
|
|
"test_case_column_value_in_set_boolean",
|
|
"columnValuesToBeInSet",
|
|
"COLUMN",
|
|
(TestCaseResult, "20", None, TestCaseStatus.Success, 20.0, 0.0, 66.67, 0.0),
|
|
),
|
|
],
|
|
)
|
|
def test_suite_validation_database(
|
|
test_case_name,
|
|
test_case_type,
|
|
test_type,
|
|
expected,
|
|
request,
|
|
create_sqlite_table,
|
|
):
|
|
"""Generic test runner for test validations"""
|
|
test_case = request.getfixturevalue(test_case_name)
|
|
(
|
|
type_,
|
|
val_1,
|
|
val_2,
|
|
status,
|
|
passed_rows,
|
|
failed_rows,
|
|
passed_percentage,
|
|
failed_percentage,
|
|
) = expected
|
|
|
|
if test_case_name == "test_case_column_values_to_be_between_date":
|
|
with patch(
|
|
"metadata.data_quality.validations.column.sqlalchemy.columnValuesToBeBetween.ColumnValuesToBeBetweenValidator._run_results",
|
|
return_value=date(2021, 7, 1),
|
|
):
|
|
test_handler_obj = import_test_case_class(
|
|
test_type,
|
|
"sqlalchemy",
|
|
test_case_type,
|
|
)
|
|
|
|
test_handler = test_handler_obj(
|
|
create_sqlite_table,
|
|
test_case=test_case,
|
|
execution_date=EXECUTION_DATE.timestamp(),
|
|
)
|
|
|
|
res = test_handler.run_validation()
|
|
elif test_case_name == "test_case_column_values_to_be_between_datetime":
|
|
with patch(
|
|
"metadata.data_quality.validations.column.sqlalchemy.columnValuesToBeBetween.ColumnValuesToBeBetweenValidator._run_results",
|
|
return_value=datetime(2021, 7, 1, 10, 37, 59),
|
|
):
|
|
test_handler_obj = import_test_case_class(
|
|
test_type,
|
|
"sqlalchemy",
|
|
test_case_type,
|
|
)
|
|
|
|
test_handler = test_handler_obj(
|
|
create_sqlite_table,
|
|
test_case=test_case,
|
|
execution_date=EXECUTION_DATE.timestamp(),
|
|
)
|
|
|
|
res = test_handler.run_validation()
|
|
else:
|
|
test_handler_obj = import_test_case_class(
|
|
test_type,
|
|
"sqlalchemy",
|
|
test_case_type,
|
|
)
|
|
|
|
test_handler = test_handler_obj(
|
|
create_sqlite_table,
|
|
test_case=test_case,
|
|
execution_date=EXECUTION_DATE.timestamp(),
|
|
)
|
|
|
|
res = test_handler.run_validation()
|
|
|
|
assert isinstance(res, type_)
|
|
if val_1:
|
|
assert res.testResultValue[0].value == val_1
|
|
if val_2:
|
|
assert res.testResultValue[1].value == val_2
|
|
if passed_rows:
|
|
assert res.passedRows == passed_rows
|
|
if failed_rows:
|
|
assert res.failedRows == failed_rows
|
|
if passed_percentage:
|
|
assert round(res.passedRowsPercentage, 2) == passed_percentage
|
|
if failed_percentage:
|
|
assert round(res.failedRowsPercentage, 2) == failed_percentage
|
|
assert res.testCaseStatus == status
|