mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-07-13 20:18:24 +00:00

* pydantic v2 * pydanticv2 * fix parser * fix annotated * fix model dumping * mysql ingestion * clean root models * clean root models * bump airflow * bump airflow * bump airflow * optionals * optionals * optionals * jdk * airflow migrate * fab provider * fab provider * fab provider * some more fixes * fixing tests and imports * model_dump and model_validate * model_dump and model_validate * model_dump and model_validate * union * pylint * pylint * integration tests * fix CostAnalysisReportData * integration tests * tests * missing defaults * missing defaults
300 lines
11 KiB
Python
300 lines
11 KiB
Python
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Validate workflow e2e
|
|
"""
|
|
|
|
import os
|
|
import unittest
|
|
from datetime import datetime, timedelta
|
|
|
|
import sqlalchemy as sqa
|
|
from sqlalchemy.orm import Session, declarative_base
|
|
|
|
from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
|
|
from metadata.generated.schema.api.data.createDatabaseSchema import (
|
|
CreateDatabaseSchemaRequest,
|
|
)
|
|
from metadata.generated.schema.api.data.createTable import CreateTableRequest
|
|
from metadata.generated.schema.api.services.createDatabaseService import (
|
|
CreateDatabaseServiceRequest,
|
|
)
|
|
from metadata.generated.schema.entity.data.database import Database
|
|
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
|
|
from metadata.generated.schema.entity.data.table import Column, DataType
|
|
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
|
|
SQLiteConnection,
|
|
SQLiteScheme,
|
|
)
|
|
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
|
OpenMetadataConnection,
|
|
)
|
|
from metadata.generated.schema.entity.services.databaseService import (
|
|
DatabaseConnection,
|
|
DatabaseService,
|
|
DatabaseServiceType,
|
|
)
|
|
from metadata.generated.schema.tests.testCase import TestCase
|
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
from metadata.workflow.data_quality import TestSuiteWorkflow
|
|
|
|
test_suite_config = {
|
|
"source": {
|
|
"type": "custom-database",
|
|
"serviceName": "test_suite_service_test",
|
|
"sourceConfig": {
|
|
"config": {
|
|
"type": "TestSuite",
|
|
"entityFullyQualifiedName": "test_suite_service_test.test_suite_database.test_suite_database_schema.users",
|
|
}
|
|
},
|
|
},
|
|
"processor": {
|
|
"type": "orm-test-runner",
|
|
"config": {
|
|
"testCases": [
|
|
{
|
|
"name": "my_test_case",
|
|
"testDefinitionName": "tableColumnCountToBeBetween",
|
|
"parameterValues": [
|
|
{"name": "minColValue", "value": "1"},
|
|
{"name": "maxColValue", "value": "5"},
|
|
],
|
|
},
|
|
{
|
|
"name": "table_column_to_be_not_null",
|
|
"testDefinitionName": "columnValuesToBeNotNull",
|
|
"columnName": "id",
|
|
"computePassedFailedRowCount": True,
|
|
},
|
|
],
|
|
},
|
|
},
|
|
"sink": {"type": "metadata-rest", "config": {}},
|
|
"workflowConfig": {
|
|
"openMetadataServerConfig": {
|
|
"hostPort": "http://localhost:8585/api",
|
|
"authProvider": "openmetadata",
|
|
"securityConfig": {
|
|
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
},
|
|
}
|
|
},
|
|
}
|
|
|
|
Base = declarative_base()
|
|
|
|
|
|
class User(Base):
|
|
__tablename__ = "users"
|
|
id = sqa.Column(sqa.Integer, primary_key=True)
|
|
name = sqa.Column(sqa.String(256))
|
|
fullname = sqa.Column(sqa.String(256))
|
|
nickname = sqa.Column(sqa.String(256))
|
|
age = sqa.Column(sqa.Integer)
|
|
|
|
|
|
class EmptyUser(Base):
|
|
__tablename__ = "empty_users"
|
|
id = sqa.Column(sqa.Integer, primary_key=True)
|
|
name = sqa.Column(sqa.String(256))
|
|
fullname = sqa.Column(sqa.String(256))
|
|
nickname = sqa.Column(sqa.String(256))
|
|
age = sqa.Column(sqa.Integer)
|
|
|
|
|
|
class TestE2EWorkflow(unittest.TestCase):
|
|
"""e2e test for the workflow"""
|
|
|
|
metadata = OpenMetadata(
|
|
OpenMetadataConnection.parse_obj(
|
|
test_suite_config["workflowConfig"]["openMetadataServerConfig"]
|
|
)
|
|
)
|
|
|
|
db_path = os.path.join(
|
|
os.path.dirname(__file__), f"{os.path.splitext(__file__)[0]}.db"
|
|
)
|
|
sqlite_conn = DatabaseConnection(
|
|
config=SQLiteConnection(
|
|
scheme=SQLiteScheme.sqlite_pysqlite,
|
|
databaseMode=db_path + "?check_same_thread=False",
|
|
)
|
|
)
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
"""set up class"""
|
|
service: DatabaseService = cls.metadata.create_or_update(
|
|
CreateDatabaseServiceRequest(
|
|
name="test_suite_service_test",
|
|
serviceType=DatabaseServiceType.SQLite,
|
|
connection=cls.sqlite_conn,
|
|
)
|
|
)
|
|
|
|
database: Database = cls.metadata.create_or_update(
|
|
CreateDatabaseRequest(
|
|
name="test_suite_database",
|
|
service=service.fullyQualifiedName,
|
|
)
|
|
)
|
|
|
|
database_schema: DatabaseSchema = cls.metadata.create_or_update(
|
|
CreateDatabaseSchemaRequest(
|
|
name="test_suite_database_schema",
|
|
database=database.fullyQualifiedName,
|
|
)
|
|
)
|
|
|
|
cls.metadata.create_or_update(
|
|
CreateTableRequest(
|
|
name="users",
|
|
columns=[
|
|
Column(name="id", dataType=DataType.INT),
|
|
Column(name="name", dataType=DataType.STRING),
|
|
Column(name="fullname", dataType=DataType.STRING),
|
|
Column(name="nickname", dataType=DataType.STRING),
|
|
Column(name="age", dataType=DataType.INT),
|
|
],
|
|
databaseSchema=database_schema.fullyQualifiedName,
|
|
)
|
|
)
|
|
cls.metadata.create_or_update(
|
|
CreateTableRequest(
|
|
name="empty_users",
|
|
columns=[
|
|
Column(name="id", dataType=DataType.INT),
|
|
Column(name="name", dataType=DataType.STRING),
|
|
Column(name="fullname", dataType=DataType.STRING),
|
|
Column(name="nickname", dataType=DataType.STRING),
|
|
Column(name="age", dataType=DataType.INT),
|
|
],
|
|
databaseSchema=database_schema.fullyQualifiedName,
|
|
)
|
|
)
|
|
|
|
engine = sqa.create_engine(f"sqlite:///{cls.sqlite_conn.config.databaseMode}")
|
|
session = Session(bind=engine)
|
|
|
|
User.__table__.create(bind=engine)
|
|
EmptyUser.__table__.create(bind=engine)
|
|
|
|
for _ in range(10):
|
|
data = [
|
|
User(
|
|
name="John",
|
|
fullname="John Doe",
|
|
nickname="johnny b goode",
|
|
age=30,
|
|
),
|
|
User(
|
|
name="Jane",
|
|
fullname="Jone Doe",
|
|
nickname="Johnny d",
|
|
age=31,
|
|
),
|
|
User(
|
|
name="John",
|
|
fullname="John Doe",
|
|
nickname=None,
|
|
age=None,
|
|
),
|
|
]
|
|
session.add_all(data)
|
|
session.commit()
|
|
|
|
@classmethod
|
|
def tearDownClass(cls) -> None:
|
|
"""
|
|
Clean up
|
|
"""
|
|
service_db_id = str(
|
|
cls.metadata.get_by_name(
|
|
entity=DatabaseService, fqn="test_suite_service_test"
|
|
).id.root
|
|
)
|
|
|
|
cls.metadata.delete(
|
|
entity=DatabaseService,
|
|
entity_id=service_db_id,
|
|
recursive=True,
|
|
hard_delete=True,
|
|
)
|
|
|
|
os.remove(cls.db_path)
|
|
return super().tearDownClass()
|
|
|
|
def test_e2e_cli_workflow(self):
|
|
"""test cli workflow e2e"""
|
|
parameters = [
|
|
{"table_name": "users", "status": "Success"},
|
|
{"table_name": "empty_users", "status": "Aborted"},
|
|
]
|
|
|
|
for param in parameters:
|
|
with self.subTest(param=param):
|
|
table_name = param["table_name"]
|
|
status = param["status"]
|
|
test_suite_config["source"]["sourceConfig"]["config"].update(
|
|
{
|
|
"entityFullyQualifiedName": f"test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}"
|
|
}
|
|
)
|
|
|
|
workflow = TestSuiteWorkflow.create(test_suite_config)
|
|
workflow.execute()
|
|
workflow.raise_from_status()
|
|
|
|
test_case_1 = self.metadata.get_by_name(
|
|
entity=TestCase,
|
|
fqn=f"test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}.my_test_case",
|
|
fields=["testDefinition", "testSuite"],
|
|
)
|
|
test_case_2 = self.metadata.get_by_name(
|
|
entity=TestCase,
|
|
fqn=f"test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}.id.table_column_to_be_not_null",
|
|
fields=["testDefinition", "testSuite"],
|
|
)
|
|
|
|
assert test_case_1
|
|
assert test_case_2
|
|
|
|
test_case_result_1 = self.metadata.client.get(
|
|
f"/dataQuality/testCases/test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}"
|
|
".my_test_case/testCaseResult",
|
|
data={
|
|
"startTs": int((datetime.now() - timedelta(days=3)).timestamp())
|
|
* 1000,
|
|
"endTs": int((datetime.now() + timedelta(days=3)).timestamp())
|
|
* 1000,
|
|
},
|
|
)
|
|
test_case_result_2 = self.metadata.client.get(
|
|
f"/dataQuality/testCases/test_suite_service_test.test_suite_database.test_suite_database_schema.{table_name}"
|
|
".id.table_column_to_be_not_null/testCaseResult",
|
|
data={
|
|
"startTs": int((datetime.now() - timedelta(days=3)).timestamp())
|
|
* 1000,
|
|
"endTs": int((datetime.now() + timedelta(days=3)).timestamp())
|
|
* 1000,
|
|
},
|
|
)
|
|
|
|
data_test_case_result_1: dict = test_case_result_1.get("data") # type: ignore
|
|
data_test_case_result_2: dict = test_case_result_2.get("data") # type: ignore
|
|
|
|
assert data_test_case_result_1
|
|
assert data_test_case_result_1[0]["testCaseStatus"] == "Success"
|
|
assert data_test_case_result_2
|
|
assert data_test_case_result_2[0]["testCaseStatus"] == status
|