mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-06 16:47:29 +00:00
108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
# Copyright 2025 Collate
|
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
test data quality
|
|
"""
|
|
from typing import List
|
|
|
|
import pytest
|
|
|
|
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
|
|
IngestionPipeline,
|
|
PipelineState,
|
|
)
|
|
from metadata.generated.schema.tests.basic import TestCaseStatus
|
|
from metadata.generated.schema.tests.testCase import TestCase
|
|
|
|
|
|
class TestDataQuality:
|
|
@pytest.mark.parametrize(
|
|
"test_case_name,expected_status",
|
|
[
|
|
("first_name_includes_john", TestCaseStatus.Success),
|
|
("first_name_is_john", TestCaseStatus.Failed),
|
|
],
|
|
)
|
|
def test_data_quality(
|
|
self,
|
|
run_test_suite_workflow,
|
|
metadata,
|
|
test_case_name,
|
|
expected_status,
|
|
ingestion_fqn,
|
|
):
|
|
test_cases: List[TestCase] = metadata.list_entities(
|
|
TestCase, fields=["*"], skip_on_failure=True
|
|
).entities
|
|
test_case: TestCase = next(
|
|
(t for t in test_cases if t.name.root == test_case_name), None
|
|
)
|
|
assert test_case is not None
|
|
assert test_case.testCaseResult.testCaseStatus == expected_status
|
|
|
|
# Check the ingestion pipeline is properly created
|
|
ingestion_pipeline: IngestionPipeline = metadata.get_by_name(
|
|
entity=IngestionPipeline, fqn=ingestion_fqn, fields=["pipelineStatuses"]
|
|
)
|
|
assert ingestion_pipeline
|
|
assert ingestion_pipeline.pipelineStatuses
|
|
assert (
|
|
ingestion_pipeline.pipelineStatuses.pipelineState == PipelineState.success
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"test_case_name,failed_rows",
|
|
[
|
|
("first_name_includes_john", None),
|
|
("first_name_is_john", 1),
|
|
],
|
|
)
|
|
def test_data_quality_with_sample(
|
|
self, run_sampled_test_suite_workflow, metadata, test_case_name, failed_rows
|
|
):
|
|
test_cases: List[TestCase] = metadata.list_entities(
|
|
TestCase, fields=["*"], skip_on_failure=True
|
|
).entities
|
|
test_case: TestCase = next(
|
|
(t for t in test_cases if t.name.root == test_case_name), None
|
|
)
|
|
assert test_case is not None
|
|
if failed_rows:
|
|
assert test_case.testCaseResult.failedRows == pytest.approx(
|
|
failed_rows, abs=1
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
"test_case_name,expected_status,failed_rows",
|
|
[
|
|
("first_name_includes_john", TestCaseStatus.Success, None),
|
|
("first_name_is_john", TestCaseStatus.Failed, 1),
|
|
],
|
|
)
|
|
def test_data_quality_with_partition(
|
|
self,
|
|
run_partitioned_test_suite_workflow,
|
|
metadata,
|
|
test_case_name,
|
|
expected_status,
|
|
failed_rows,
|
|
):
|
|
test_cases: List[TestCase] = metadata.list_entities(
|
|
TestCase, fields=["*"], skip_on_failure=True
|
|
).entities
|
|
test_case: TestCase = next(
|
|
(t for t in test_cases if t.name.root == test_case_name), None
|
|
)
|
|
assert test_case is not None
|
|
assert test_case.testCaseResult.testCaseStatus == expected_status
|
|
if failed_rows:
|
|
assert test_case.testCaseResult.failedRows == failed_rows
|