2023-01-31 15:57:51 +01:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Test Table and Column Tests' validate implementations.
|
|
|
|
|
|
|
|
Each test should validate the Success, Failure and Aborted statuses
|
|
|
|
"""
|
|
|
|
|
2023-02-22 09:42:34 +01:00
|
|
|
from datetime import datetime, timedelta
|
2023-01-31 15:57:51 +01:00
|
|
|
|
|
|
|
import pytest
|
|
|
|
from pandas import DataFrame
|
|
|
|
|
2023-04-04 17:16:44 +02:00
|
|
|
from metadata.data_quality.validations.validator import Validator
|
2023-01-31 15:57:51 +01:00
|
|
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
2023-02-22 09:42:34 +01:00
|
|
|
from metadata.utils.importer import import_test_case_class
|
2023-01-31 15:57:51 +01:00
|
|
|
|
|
|
|
EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
|
|
|
DL_DATA = (
|
2023-02-22 09:42:34 +01:00
|
|
|
[
|
|
|
|
"1",
|
|
|
|
"John",
|
|
|
|
"Jo",
|
|
|
|
"John Doe",
|
|
|
|
"johnny b goode",
|
|
|
|
30,
|
|
|
|
datetime.today() - timedelta(days=1),
|
|
|
|
],
|
|
|
|
[
|
|
|
|
"2",
|
|
|
|
"Jane",
|
|
|
|
"Ja",
|
|
|
|
"Jone Doe",
|
|
|
|
"Johnny d",
|
|
|
|
31,
|
|
|
|
datetime.today() - timedelta(days=2),
|
|
|
|
],
|
|
|
|
["3", "John", "Joh", "John Doe", None, None, datetime.today() - timedelta(days=3)],
|
2023-01-31 15:57:51 +01:00
|
|
|
) * 10
|
|
|
|
|
|
|
|
|
|
|
|
DATALAKE_DATA_FRAME = DataFrame(
|
2023-02-22 09:42:34 +01:00
|
|
|
DL_DATA,
|
|
|
|
columns=[
|
|
|
|
"id",
|
|
|
|
"name",
|
|
|
|
"first name",
|
|
|
|
"fullname",
|
|
|
|
"nickname",
|
|
|
|
"age",
|
|
|
|
"inserted_date",
|
|
|
|
],
|
2023-01-31 15:57:51 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
# pylint: disable=line-too-long
|
|
|
|
@pytest.mark.parametrize(
|
2023-02-22 09:42:34 +01:00
|
|
|
"test_case_name,test_case_type,test_type,expected",
|
2023-01-31 15:57:51 +01:00
|
|
|
[
|
|
|
|
(
|
|
|
|
"test_case_column_value_length_to_be_between",
|
|
|
|
"columnValueLengthsToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
|
|
|
(TestCaseResult, "8", "14", TestCaseStatus.Failed),
|
2023-01-31 15:57:51 +01:00
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_length_to_be_between_col_space",
|
|
|
|
"columnValueLengthsToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "2", "3", TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_length_to_be_between_no_min",
|
|
|
|
"columnValueLengthsToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_max_to_be_between",
|
|
|
|
"columnValueMaxToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "31.0", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_max_to_be_between_no_min",
|
|
|
|
"columnValueMaxToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_mean_to_be_between",
|
|
|
|
"columnValueMeanToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30.5", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_mean_to_be_between_no_max",
|
|
|
|
"columnValueMeanToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_median_to_be_between",
|
|
|
|
"columnValueMedianToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30.5", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_min_to_be_between",
|
|
|
|
"columnValueMinToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30.0", None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_min_to_be_between_no_min",
|
|
|
|
"columnValueMinToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_stddev_to_be_between",
|
|
|
|
"columnValueStdDevToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "0.512989176042577", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_stddev_to_be_between_no_min",
|
|
|
|
"columnValueStdDevToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_value_in_set",
|
|
|
|
"columnValuesToBeInSet",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "20", None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_values_missing_count_to_be_equal",
|
|
|
|
"columnValuesMissingCount",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "10", None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
2023-02-22 09:42:34 +01:00
|
|
|
"test_case_column_values_missing_count_to_be_equal_missing_values",
|
2023-01-31 15:57:51 +01:00
|
|
|
"columnValuesMissingCount",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "20", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_values_not_in_set",
|
|
|
|
"columnValuesToBeNotInSet",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "20", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_sum_to_be_between",
|
|
|
|
"columnValuesSumToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "610.0", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_values_to_be_between",
|
|
|
|
"columnValuesToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30.0", None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_values_to_be_not_null",
|
|
|
|
"columnValuesToBeNotNull",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "10", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_values_to_be_unique",
|
|
|
|
"columnValuesToBeUnique",
|
2023-02-22 09:42:34 +01:00
|
|
|
"COLUMN",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30", "2", TestCaseStatus.Failed),
|
|
|
|
),
|
2023-02-22 09:42:34 +01:00
|
|
|
(
|
|
|
|
"test_case_column_values_to_match_regex",
|
|
|
|
"columnValuesToMatchRegex",
|
|
|
|
"COLUMN",
|
|
|
|
(TestCaseResult, "30", None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_values_to_not_match_regex",
|
|
|
|
"columnValuesToNotMatchRegex",
|
|
|
|
"COLUMN",
|
|
|
|
(TestCaseResult, "0", None, TestCaseStatus.Success),
|
|
|
|
),
|
2023-01-31 15:57:51 +01:00
|
|
|
(
|
|
|
|
"test_case_table_column_count_to_be_between",
|
|
|
|
"tableColumnCountToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
|
|
|
(TestCaseResult, "7", None, TestCaseStatus.Success),
|
2023-01-31 15:57:51 +01:00
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_table_column_count_to_equal",
|
|
|
|
"tableColumnCountToEqual",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
|
|
|
(TestCaseResult, "7", None, TestCaseStatus.Failed),
|
2023-01-31 15:57:51 +01:00
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_table_column_name_to_exist",
|
|
|
|
"tableColumnNameToExist",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
|
|
|
(TestCaseResult, "1", None, TestCaseStatus.Success),
|
2023-01-31 15:57:51 +01:00
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_to_match_set",
|
|
|
|
"tableColumnToMatchSet",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
2023-01-31 15:57:51 +01:00
|
|
|
(
|
|
|
|
TestCaseResult,
|
2023-02-22 09:42:34 +01:00
|
|
|
"0",
|
2023-01-31 15:57:51 +01:00
|
|
|
None,
|
|
|
|
TestCaseStatus.Failed,
|
|
|
|
),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_column_to_match_set_ordered",
|
|
|
|
"tableColumnToMatchSet",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Failed),
|
|
|
|
),
|
2023-02-22 09:42:34 +01:00
|
|
|
(
|
|
|
|
"test_case_table_custom_sql_query_failed_dl",
|
|
|
|
"tableCustomSQLQuery",
|
|
|
|
"TABLE",
|
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_table_custom_sql_query_success_dl",
|
|
|
|
"tableCustomSQLQuery",
|
|
|
|
"TABLE",
|
|
|
|
(TestCaseResult, None, None, TestCaseStatus.Success),
|
|
|
|
),
|
2023-01-31 15:57:51 +01:00
|
|
|
(
|
|
|
|
"test_case_table_row_count_to_be_between",
|
|
|
|
"tableRowCountToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30", None, TestCaseStatus.Success),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_table_row_count_to_be_equal",
|
|
|
|
"tableRowCountToEqual",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
2023-01-31 15:57:51 +01:00
|
|
|
(TestCaseResult, "30", None, TestCaseStatus.Failed),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"test_case_table_row_inserted_count_to_be_between",
|
|
|
|
"tableRowInsertedCountToBeBetween",
|
2023-02-22 09:42:34 +01:00
|
|
|
"TABLE",
|
|
|
|
(TestCaseResult, "10", None, TestCaseStatus.Success),
|
2023-01-31 15:57:51 +01:00
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_suite_validation_datalake(
|
|
|
|
test_case_name,
|
|
|
|
test_case_type,
|
2023-02-22 09:42:34 +01:00
|
|
|
test_type,
|
2023-01-31 15:57:51 +01:00
|
|
|
expected,
|
|
|
|
request,
|
|
|
|
):
|
|
|
|
"""Generic test runner for test validations"""
|
|
|
|
test_case = request.getfixturevalue(test_case_name)
|
|
|
|
type_, val_1, val_2, status = expected
|
|
|
|
|
2023-02-22 09:42:34 +01:00
|
|
|
test_handler_obj = import_test_case_class(
|
|
|
|
test_type,
|
|
|
|
"pandas",
|
|
|
|
test_case_type,
|
|
|
|
)
|
|
|
|
|
|
|
|
test_handler = test_handler_obj(
|
2023-01-31 15:57:51 +01:00
|
|
|
DATALAKE_DATA_FRAME,
|
|
|
|
test_case=test_case,
|
|
|
|
execution_date=EXECUTION_DATE.timestamp(),
|
|
|
|
)
|
|
|
|
|
2023-02-22 09:42:34 +01:00
|
|
|
validator = Validator(test_handler)
|
|
|
|
res = validator.validate()
|
|
|
|
|
2023-01-31 15:57:51 +01:00
|
|
|
assert isinstance(res, type_)
|
|
|
|
if val_1:
|
|
|
|
assert res.testResultValue[0].value == val_1
|
|
|
|
if val_2:
|
|
|
|
assert res.testResultValue[1].value == val_2
|
|
|
|
assert res.testCaseStatus == status
|