Eugenio 42416a513e
Simplified API for validating DataFrames (#24009)
* Refactor previous tests for shared resources

* Add validation result models

This also includes a method for merging them, useful when running validation in batches

* Added `DataFrameValidationEngine` for running tests

This also includes a registry for mapping test names to pandas test classes

* Implement the DataFrameValidator facade

This includes the logic to load tests from different sources (OpenMetadata or code) and pass them down to the engine.

It also includes tests for the integration with OpenMetadata

* Add examples for the API

* Apply comments
2025-11-04 09:52:43 +01:00

45 lines
1.5 KiB
Python

from uuid import uuid4
from metadata.generated.schema.tests.testCase import TestCase
from metadata.generated.schema.type.entityReference import EntityReference
from metadata.sdk.data_quality import BaseTest, ColumnTest
class MockTestCase(TestCase):
"""Mock test case."""
def create_mock_test_case(test_definition: BaseTest) -> MockTestCase:
"""Convert TestCaseDefinition to TestCase object.
Returns:
Synthetic TestCase for DataFrame validation
"""
entity_link = "<#E::table::dataframe_validation>"
if isinstance(test_definition, ColumnTest):
entity_link = (
f"<#E::table::dataframe_validation::columns::{test_definition.column_name}>"
)
return MockTestCase( # pyright: ignore[reportCallIssue]
id=uuid4(),
name=test_definition.name,
fullyQualifiedName=test_definition.name,
displayName=test_definition.display_name,
description=test_definition.description,
testDefinition=EntityReference( # pyright: ignore[reportCallIssue]
id=uuid4(),
name=test_definition.test_definition_name,
fullyQualifiedName=test_definition.test_definition_name,
type="testDefinition",
),
entityLink=entity_link,
parameterValues=test_definition.parameters,
testSuite=EntityReference( # pyright: ignore[reportCallIssue]
id=uuid4(),
name="dataframe_validation",
type="testSuite",
),
computePassedFailedRowCount=True,
)