Imri Paran b960b60965
Fix #16421: add tableDiff test case (#16554)
* feat: add tableDiff test case

This changed introduces a "table diff" test case which
compares two tables and fails if they are not identical.
The similarity is made based on a specific "key" (because the test only makes sense when performed on ordered collections).

1. Added the `tableDiff` test definition.
2. Implemented a "runtime" parameters feature which injects additional parameters for the test at runtime.
3. Integration tests (because of course).

This feature was not tested end-to-end yet because "array" data

* pydantic v2

* format

* format

* format and added data diff to setup.py

* format

* fixed param issue which has type ARRAY

* fixed runtime_parameter_setter

* moved models to parent directory

* handle errors in table diff

* fixed issue with edit test case

* format

* added more details to pytest skip

* format

* refactor: Improve createTestCaseParameters function in DataQualityUtils

* fixed unit test

* removed unused fixture

* removed validator.py

* fixed tests

* added validate kwarg to tests_mixin

* removed "postgres" data diff extra as they interfere with psycopg2-binary

* fixed tests

* pinned tenacity for tests

* reverted tenacity pinning

* added ui support for test diff

* fixed dq cypress and added edit flow

* organized the test case

* added dialect support

* fixed tests

* option style fix

* fixed calculation for passing/failing rows

* restrict the tableDiff test to limited services

* set where to None if blank string

* fixed where clause

* fixed tests for where clause

* use displayName in place of name in edit form

* added docs for RuntimeParameterSetter

* fixed cypress

---------

Co-authored-by: Shailesh Parmar <shailesh.parmar.webdev@gmail.com>
2024-06-20 16:54:12 +02:00

71 lines
2.5 KiB
Python

import pytest
from testcontainers.mysql import MySqlContainer
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.metadataIngestion.workflow import LogLevels
from metadata.ingestion.models.custom_pydantic import CustomSecretStr
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.metadata import MetadataWorkflow
from ..postgres.conftest import db_service as postgres_service
from ..postgres.conftest import ingest_metadata as ingest_postgres
from ..postgres.conftest import postgres_container, try_bind
__all__ = [
"ingest_postgres",
"postgres_service",
"postgres_container",
]
@pytest.fixture(scope="module")
def mysql_container():
with try_bind(MySqlContainer("mysql:8"), 3306, 3307) as container:
yield container
@pytest.fixture(scope="module")
def ingest_mysql_service(
mysql_container: MySqlContainer, metadata: OpenMetadata, tmp_path_factory
):
workflow_config = {
"source": {
"type": "mysql",
"serviceName": "integration_test_mysql_"
+ tmp_path_factory.mktemp("mysql").name.split("/")[-1],
"serviceConnection": {
"config": {
"type": "Mysql",
"username": mysql_container.username,
"authType": {
"password": mysql_container.password,
},
"hostPort": "localhost:" + mysql_container.get_exposed_port(3306),
"databaseSchema": mysql_container.dbname,
}
},
"sourceConfig": {
"config": {
"type": "DatabaseMetadata",
},
},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"loggerLevel": LogLevels.DEBUG.value,
"openMetadataServerConfig": metadata.config.dict(),
},
}
metadata_ingestion = MetadataWorkflow.create(workflow_config)
metadata_ingestion.execute()
metadata_ingestion.raise_from_status()
metadata_ingestion.stop()
db_service: DatabaseService = metadata.get_by_name(
DatabaseService, workflow_config["source"]["serviceName"]
)
db_service.connection.config.authType.password = CustomSecretStr(
mysql_container.password
)
yield db_service
metadata.delete(DatabaseService, db_service.id, recursive=True, hard_delete=True)