mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-18 04:05:42 +00:00

* Update test data for `tests.integration.trino` This is to create tables with complex data types. Using raw SQL because creating tables with pandas didn't get the right types for the structs * Update tests to reproduce the issue Also included the new tables in the other tests to make sure complex data types do not break anything else Reference: [issue 16983](https://github.com/open-metadata/OpenMetadata/issues/16983) * Added `TypeDecorator`s handle `trino.types.NamedRowTuple` This is because pydantic couldn't figure out how to create python objects when receiving `NamedRowTuple`s, which broke the sampling process. This makes sure the data we receive from the trino interface is compatible with Pydantic
64 lines
1.9 KiB
Python
64 lines
1.9 KiB
Python
from copy import deepcopy
|
|
|
|
import pytest
|
|
|
|
from metadata.generated.schema.entity.data.table import Table
|
|
from metadata.generated.schema.metadataIngestion.databaseServiceAutoClassificationPipeline import (
|
|
DatabaseServiceAutoClassificationPipeline,
|
|
)
|
|
from metadata.ingestion.lineage.sql_lineage import search_cache
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
from metadata.workflow.classification import AutoClassificationWorkflow
|
|
from metadata.workflow.metadata import MetadataWorkflow
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def sampling_only_classifier_config(
|
|
db_service, sink_config, workflow_config, classifier_config
|
|
):
|
|
config = deepcopy(classifier_config)
|
|
config["source"]["sourceConfig"]["config"]["enableAutoClassification"] = False
|
|
return config
|
|
|
|
|
|
@pytest.fixture(
|
|
scope="module",
|
|
)
|
|
def run_classifier(
|
|
patch_passwords_for_db_services,
|
|
run_workflow,
|
|
ingestion_config,
|
|
sampling_only_classifier_config,
|
|
create_test_data,
|
|
request,
|
|
):
|
|
search_cache.clear()
|
|
run_workflow(MetadataWorkflow, ingestion_config)
|
|
run_workflow(AutoClassificationWorkflow, sampling_only_classifier_config)
|
|
return ingestion_config
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"table_name",
|
|
(
|
|
"{database_service}.minio.my_schema.table",
|
|
"{database_service}.minio.my_schema.titanic",
|
|
"{database_service}.minio.my_schema.iris",
|
|
"{database_service}.minio.my_schema.userdata",
|
|
"{database_service}.minio.my_schema.empty",
|
|
"{database_service}.minio.my_schema.complex_and_simple",
|
|
"{database_service}.minio.my_schema.only_complex",
|
|
),
|
|
)
|
|
def test_auto_classification_workflow(
|
|
run_classifier,
|
|
metadata: OpenMetadata,
|
|
table_name: str,
|
|
db_service: DatabaseServiceAutoClassificationPipeline,
|
|
):
|
|
table = metadata.get_by_name(
|
|
Table, table_name.format(database_service=db_service.fullyQualifiedName.root)
|
|
)
|
|
|
|
assert metadata.get_sample_data(table) is not None
|