Teddy 58699063db
MINOR -- Fix DQ Partition Issue (#18641)
* fix: renamed `random_sample` to `get_dataset` and change dunder method access for SQA Table object

* fix: removed handle_partition decorator

* fix: fixed DQ partition issue + moved to `tablesample` method

* style: ran python linting

* style: fix python format check issues

* feat: added postgres tablesample

* style: ran python linting

* fix: sampling delta

* fix: merge conflicts

* fix: resolved conflicts

* style: ran python linting

* fix: patch orm call in test case

* fix: mock build_table_orm call in tests

* fix: test case failures and errors

* fix: removed unused import

* fix: patch typo

* fix: trino table schema retrieval

* fix: remove tuple context manager for 3.8 test support
2024-11-27 08:50:54 +01:00

89 lines
3.1 KiB
Python

# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
test data quality
"""
from typing import List
import pytest
from metadata.generated.schema.tests.basic import TestCaseStatus
from metadata.generated.schema.tests.testCase import TestCase
class TestDataQuality:
@pytest.mark.parametrize(
"test_case_name,expected_status",
[
("first_name_includes_john", TestCaseStatus.Success),
("first_name_is_john", TestCaseStatus.Failed),
],
)
def test_data_quality(
self, run_test_suite_workflow, metadata, test_case_name, expected_status
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.root == test_case_name), None
)
assert test_case is not None
assert test_case.testCaseResult.testCaseStatus == expected_status
@pytest.mark.parametrize(
"test_case_name,failed_rows",
[
("first_name_includes_john", None),
("first_name_is_john", 1),
],
)
def test_data_quality_with_sample(
self, run_sampled_test_suite_workflow, metadata, test_case_name, failed_rows
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.root == test_case_name), None
)
assert test_case is not None
if failed_rows:
assert test_case.testCaseResult.failedRows == pytest.approx(
failed_rows, abs=1
)
@pytest.mark.parametrize(
"test_case_name,expected_status,failed_rows",
[
("first_name_includes_john", TestCaseStatus.Success, None),
("first_name_is_john", TestCaseStatus.Failed, 1),
],
)
def test_data_quality_with_partition(
self,
run_partitioned_test_suite_workflow,
metadata,
test_case_name,
expected_status,
failed_rows,
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.root == test_case_name), None
)
assert test_case is not None
assert test_case.testCaseResult.testCaseStatus == expected_status
if failed_rows:
assert test_case.testCaseResult.failedRows == failed_rows