OpenMetadata/ingestion/tests/unit/test_suite/test_validations_databases.py
Teddy 3dc642989c
Fixes #7729 - Add logic to compute passed/failed rows (#14472)
* feat: add test case resolution task workflow

* chore: add migration for test case resolution feature

* fix: removed required field for object compatibiity in older migrations

* fix: minor testCaseResolution status logic

* chore: revert migration for test case incident

* chore: update migration file

* style: renamed variables

* feat: added logic to compute failed/passed rows

* feat: add support for row level computation in schema

* chore: add test definition migration

* feat: add logic to explicitly compute row level failure

* chore: clean up code

* style: fix java

* style: fix pyton format

* fix: unhidde API for incident manager

* style: fix java styling
2023-12-27 13:38:51 +01:00

502 lines
15 KiB
Python

# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Test Table and Column Tests' validate implementations.
Each test should validate the Success, Failure and Aborted statuses
"""
from datetime import date, datetime
from unittest.mock import patch
import pytest
from metadata.data_quality.validations.validator import Validator
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
from metadata.utils.importer import import_test_case_class
EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
# pylint: disable=line-too-long
@pytest.mark.parametrize(
"test_case_name,test_case_type,test_type,expected",
[
(
"test_case_column_values_to_be_between_date",
"columnValuesToBeBetween",
"COLUMN",
(
TestCaseResult,
"2021-07-01 00:00:00",
"2021-07-01 23:59:59.999999",
TestCaseStatus.Failed,
0.0,
30.0,
0.0,
100.0,
),
),
(
"test_case_column_values_to_be_between_datetime",
"columnValuesToBeBetween",
"COLUMN",
(
TestCaseResult,
"2021-07-01 10:37:59",
"2021-07-01 10:37:59",
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_column_value_length_to_be_between",
"columnValueLengthsToBeBetween",
"COLUMN",
(
TestCaseResult,
"8",
"14",
TestCaseStatus.Failed,
20.0,
10.0,
66.67,
33.33,
),
),
(
"test_case_column_value_length_to_be_between_col_space",
"columnValueLengthsToBeBetween",
"COLUMN",
(TestCaseResult, "2", "3", TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
),
(
"test_case_column_value_length_to_be_between_no_min",
"columnValueLengthsToBeBetween",
"COLUMN",
(TestCaseResult, None, None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
),
(
"test_case_column_value_max_to_be_between",
"columnValueMaxToBeBetween",
"COLUMN",
(TestCaseResult, "31", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_column_value_max_to_be_between_no_min",
"columnValueMaxToBeBetween",
"COLUMN",
(TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_column_value_mean_to_be_between",
"columnValueMeanToBeBetween",
"COLUMN",
(
TestCaseResult,
"30.5",
None,
TestCaseStatus.Failed,
None,
None,
None,
None,
),
),
(
"test_case_column_value_mean_to_be_between_no_max",
"columnValueMeanToBeBetween",
"COLUMN",
(
TestCaseResult,
None,
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_column_value_median_to_be_between",
"columnValueMedianToBeBetween",
"COLUMN",
(TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_column_value_min_to_be_between",
"columnValueMinToBeBetween",
"COLUMN",
(
TestCaseResult,
"30",
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_column_value_min_to_be_between_no_min",
"columnValueMinToBeBetween",
"COLUMN",
(
TestCaseResult,
None,
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_column_value_stddev_to_be_between",
"columnValueStdDevToBeBetween",
"COLUMN",
(
TestCaseResult,
"0.25",
None,
TestCaseStatus.Failed,
None,
None,
None,
None,
),
),
(
"test_case_column_value_stddev_to_be_between_no_min",
"columnValueStdDevToBeBetween",
"COLUMN",
(
TestCaseResult,
None,
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_column_value_in_set",
"columnValuesToBeInSet",
"COLUMN",
(
TestCaseResult,
"20",
None,
TestCaseStatus.Success,
20.0,
10.0,
66.67,
33.33,
),
),
(
"test_case_column_values_missing_count_to_be_equal",
"columnValuesMissingCount",
"COLUMN",
(
TestCaseResult,
"10",
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_column_values_missing_count_to_be_equal_missing_values",
"columnValuesMissingCount",
"COLUMN",
(TestCaseResult, "20", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_column_values_not_in_set",
"columnValuesToBeNotInSet",
"COLUMN",
(
TestCaseResult,
"20",
None,
TestCaseStatus.Failed,
10.0,
20.0,
33.33,
66.67,
),
),
(
"test_case_column_sum_to_be_between",
"columnValuesSumToBeBetween",
"COLUMN",
(
TestCaseResult,
"610",
None,
TestCaseStatus.Failed,
None,
None,
None,
None,
),
),
(
"test_case_column_values_to_be_between",
"columnValuesToBeBetween",
"COLUMN",
(TestCaseResult, "30", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
),
(
"test_case_column_values_to_be_not_null",
"columnValuesToBeNotNull",
"COLUMN",
(
TestCaseResult,
"10",
None,
TestCaseStatus.Failed,
20.0,
10.0,
66.67,
33.33,
),
),
(
"test_case_column_values_to_be_unique",
"columnValuesToBeUnique",
"COLUMN",
(TestCaseResult, "20", "0", TestCaseStatus.Failed, 0.0, 20.0, 0.0, 100.0),
),
(
"test_case_column_values_to_match_regex",
"columnValuesToMatchRegex",
"COLUMN",
(TestCaseResult, "30", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
),
(
"test_case_column_values_to_not_match_regex",
"columnValuesToNotMatchRegex",
"COLUMN",
(TestCaseResult, "0", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0),
),
(
"test_case_table_column_count_to_be_between",
"tableColumnCountToBeBetween",
"TABLE",
(TestCaseResult, "7", None, TestCaseStatus.Success, None, None, None, None),
),
(
"test_case_table_column_count_to_equal",
"tableColumnCountToEqual",
"TABLE",
(TestCaseResult, "7", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_table_column_name_to_exist",
"tableColumnNameToExist",
"TABLE",
(TestCaseResult, "1", None, TestCaseStatus.Success, None, None, None, None),
),
(
"test_case_column_to_match_set",
"tableColumnToMatchSet",
"TABLE",
(
TestCaseResult,
"0",
None,
TestCaseStatus.Failed,
None,
None,
None,
None,
),
),
(
"test_case_column_to_match_set_ordered",
"tableColumnToMatchSet",
"TABLE",
(TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_table_custom_sql_query",
"tableCustomSQLQuery",
"TABLE",
(TestCaseResult, "20", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_table_custom_sql_query_success",
"tableCustomSQLQuery",
"TABLE",
(TestCaseResult, "0", None, TestCaseStatus.Success, None, None, None, None),
),
(
"test_case_table_row_count_to_be_between",
"tableRowCountToBeBetween",
"TABLE",
(
TestCaseResult,
"30",
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_table_row_count_to_be_equal",
"tableRowCountToEqual",
"TABLE",
(TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_table_row_inserted_count_to_be_between",
"tableRowInsertedCountToBeBetween",
"TABLE",
(TestCaseResult, "6", None, TestCaseStatus.Success, None, None, None, None),
),
(
"test_case_table_custom_sql_query_with_threshold_success",
"tableCustomSQLQuery",
"TABLE",
(
TestCaseResult,
"10",
None,
TestCaseStatus.Success,
None,
None,
None,
None,
),
),
(
"test_case_table_custom_sql_unsafe_query_aborted",
"tableCustomSQLQuery",
"TABLE",
(
TestCaseResult,
None,
None,
TestCaseStatus.Aborted,
None,
None,
None,
None,
),
),
],
)
def test_suite_validation_database(
test_case_name,
test_case_type,
test_type,
expected,
request,
create_sqlite_table,
):
"""Generic test runner for test validations"""
test_case = request.getfixturevalue(test_case_name)
(
type_,
val_1,
val_2,
status,
passed_rows,
failed_rows,
passed_percentage,
failed_percentage,
) = expected
if test_case_name == "test_case_column_values_to_be_between_date":
with patch(
"metadata.data_quality.validations.column.sqlalchemy.columnValuesToBeBetween.ColumnValuesToBeBetweenValidator._run_results",
return_value=date(2021, 7, 1),
):
test_handler_obj = import_test_case_class(
test_type,
"sqlalchemy",
test_case_type,
)
test_handler = test_handler_obj(
create_sqlite_table,
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
)
validator = Validator(test_handler)
res = validator.validate()
elif test_case_name == "test_case_column_values_to_be_between_datetime":
with patch(
"metadata.data_quality.validations.column.sqlalchemy.columnValuesToBeBetween.ColumnValuesToBeBetweenValidator._run_results",
return_value=datetime(2021, 7, 1, 10, 37, 59),
):
test_handler_obj = import_test_case_class(
test_type,
"sqlalchemy",
test_case_type,
)
test_handler = test_handler_obj(
create_sqlite_table,
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
)
validator = Validator(test_handler)
res = validator.validate()
else:
test_handler_obj = import_test_case_class(
test_type,
"sqlalchemy",
test_case_type,
)
test_handler = test_handler_obj(
create_sqlite_table,
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
)
validator = Validator(test_handler)
res = validator.validate()
assert isinstance(res, type_)
if val_1:
assert res.testResultValue[0].value == val_1
if val_2:
assert res.testResultValue[1].value == val_2
if passed_rows:
assert res.passedRows == passed_rows
if failed_rows:
assert res.failedRows == failed_rows
if passed_percentage:
assert round(res.passedRowsPercentage, 2) == passed_percentage
if failed_percentage:
assert round(res.failedRowsPercentage, 2) == failed_percentage
assert res.testCaseStatus == status