2022-07-20 17:54:10 +02:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Test SQA Interface
|
|
|
|
"""
|
|
|
|
|
2022-07-29 10:41:53 +02:00
|
|
|
import os
|
2024-06-07 04:36:17 +02:00
|
|
|
from datetime import datetime
|
2022-07-20 17:54:10 +02:00
|
|
|
from unittest import TestCase
|
2022-10-11 15:57:25 +02:00
|
|
|
from unittest.mock import patch
|
2022-08-19 10:52:08 +02:00
|
|
|
from uuid import uuid4
|
2022-07-20 17:54:10 +02:00
|
|
|
|
2022-07-29 10:41:53 +02:00
|
|
|
from sqlalchemy import TEXT, Column, Integer, String, inspect
|
2022-07-20 17:54:10 +02:00
|
|
|
from sqlalchemy.orm import declarative_base
|
|
|
|
from sqlalchemy.orm.session import Session
|
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
from metadata.generated.schema.api.data.createTableProfile import (
|
|
|
|
CreateTableProfileRequest,
|
|
|
|
)
|
2022-08-19 10:52:08 +02:00
|
|
|
from metadata.generated.schema.entity.data.table import Column as EntityColumn
|
|
|
|
from metadata.generated.schema.entity.data.table import (
|
|
|
|
ColumnName,
|
|
|
|
ColumnProfile,
|
|
|
|
DataType,
|
|
|
|
Table,
|
|
|
|
TableProfile,
|
|
|
|
)
|
2022-07-20 17:54:10 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
|
|
|
|
SQLiteConnection,
|
|
|
|
SQLiteScheme,
|
|
|
|
)
|
2024-06-05 21:18:37 +02:00
|
|
|
from metadata.generated.schema.type.basic import Timestamp
|
2023-11-17 17:51:39 +01:00
|
|
|
from metadata.profiler.api.models import ThreadPoolMetrics
|
2023-06-22 12:51:56 +05:30
|
|
|
from metadata.profiler.interface.sqlalchemy.profiler_interface import (
|
2023-04-04 17:16:44 +02:00
|
|
|
SQAProfilerInterface,
|
|
|
|
)
|
2023-03-01 08:20:38 +01:00
|
|
|
from metadata.profiler.metrics.core import (
|
2022-07-29 10:41:53 +02:00
|
|
|
ComposedMetric,
|
|
|
|
MetricTypes,
|
|
|
|
QueryMetric,
|
|
|
|
StaticMetric,
|
|
|
|
)
|
2023-03-01 08:20:38 +01:00
|
|
|
from metadata.profiler.metrics.static.row_count import RowCount
|
2023-04-04 17:16:44 +02:00
|
|
|
from metadata.profiler.processor.default import get_default_metrics
|
2022-07-20 17:54:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
class User(declarative_base()):
|
|
|
|
__tablename__ = "users"
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
name = Column(String(256))
|
|
|
|
fullname = Column(String(256))
|
|
|
|
nickname = Column(String(256))
|
|
|
|
comments = Column(TEXT)
|
|
|
|
age = Column(Integer)
|
|
|
|
|
|
|
|
|
2022-08-25 10:01:28 +02:00
|
|
|
class SQAInterfaceTest(TestCase):
|
2022-07-20 17:54:10 +02:00
|
|
|
def setUp(self) -> None:
|
2022-08-19 10:52:08 +02:00
|
|
|
table_entity = Table(
|
|
|
|
id=uuid4(),
|
|
|
|
name="user",
|
|
|
|
columns=[
|
|
|
|
EntityColumn(
|
2024-06-05 21:18:37 +02:00
|
|
|
name=ColumnName("id"),
|
2022-08-19 10:52:08 +02:00
|
|
|
dataType=DataType.INT,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
2022-07-20 17:54:10 +02:00
|
|
|
sqlite_conn = SQLiteConnection(
|
|
|
|
scheme=SQLiteScheme.sqlite_pysqlite,
|
|
|
|
)
|
2022-10-11 15:57:25 +02:00
|
|
|
with patch.object(
|
|
|
|
SQAProfilerInterface, "_convert_table_to_orm_object", return_value=User
|
|
|
|
):
|
|
|
|
self.sqa_profiler_interface = SQAProfilerInterface(
|
2023-03-01 08:20:38 +01:00
|
|
|
sqlite_conn,
|
|
|
|
None,
|
|
|
|
table_entity,
|
|
|
|
None,
|
|
|
|
None,
|
|
|
|
None,
|
|
|
|
None,
|
2023-11-09 18:49:42 +05:30
|
|
|
None,
|
2022-10-11 15:57:25 +02:00
|
|
|
)
|
2022-07-20 17:54:10 +02:00
|
|
|
self.table = User
|
|
|
|
|
|
|
|
def test_init_interface(self):
|
|
|
|
"""Test we can instantiate our interface object correctly"""
|
|
|
|
|
|
|
|
assert isinstance(self.sqa_profiler_interface.session, Session)
|
|
|
|
|
|
|
|
def tearDown(self) -> None:
|
|
|
|
self.sqa_profiler_interface._sampler = None
|
2022-07-29 10:41:53 +02:00
|
|
|
|
|
|
|
|
2022-08-25 10:01:28 +02:00
|
|
|
class SQAInterfaceTestMultiThread(TestCase):
|
2022-08-19 10:52:08 +02:00
|
|
|
table_entity = Table(
|
|
|
|
id=uuid4(),
|
|
|
|
name="user",
|
|
|
|
columns=[
|
|
|
|
EntityColumn(
|
2024-06-05 21:18:37 +02:00
|
|
|
name=ColumnName("id"),
|
2022-08-19 10:52:08 +02:00
|
|
|
dataType=DataType.INT,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
2022-07-29 10:41:53 +02:00
|
|
|
db_path = os.path.join(os.path.dirname(__file__), "test.db")
|
|
|
|
sqlite_conn = SQLiteConnection(
|
|
|
|
scheme=SQLiteScheme.sqlite_pysqlite,
|
|
|
|
databaseMode=db_path + "?check_same_thread=False",
|
|
|
|
)
|
2022-10-11 15:57:25 +02:00
|
|
|
with patch.object(
|
|
|
|
SQAProfilerInterface, "_convert_table_to_orm_object", return_value=User
|
|
|
|
):
|
|
|
|
sqa_profiler_interface = SQAProfilerInterface(
|
2023-11-09 18:49:42 +05:30
|
|
|
sqlite_conn, None, table_entity, None, None, None, None, None, 5, 43200
|
2022-10-11 15:57:25 +02:00
|
|
|
)
|
2022-07-29 10:41:53 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Prepare Ingredients
|
|
|
|
"""
|
|
|
|
User.__table__.create(bind=cls.sqa_profiler_interface.session.get_bind())
|
|
|
|
|
|
|
|
data = [
|
|
|
|
User(name="John", fullname="John Doe", nickname="johnny b goode", age=30),
|
|
|
|
User(name="Jane", fullname="Jone Doe", nickname=None, age=31),
|
|
|
|
]
|
|
|
|
cls.sqa_profiler_interface.session.add_all(data)
|
|
|
|
cls.sqa_profiler_interface.session.commit()
|
|
|
|
cls.table = User
|
|
|
|
cls.metrics = get_default_metrics(cls.table)
|
|
|
|
cls.static_metrics = [
|
|
|
|
metric for metric in cls.metrics if issubclass(metric, StaticMetric)
|
|
|
|
]
|
|
|
|
cls.composed_metrics = [
|
|
|
|
metric for metric in cls.metrics if issubclass(metric, ComposedMetric)
|
|
|
|
]
|
|
|
|
cls.window_metrics = [
|
|
|
|
metric
|
|
|
|
for metric in cls.metrics
|
|
|
|
if issubclass(metric, StaticMetric) and metric.is_window_metric()
|
|
|
|
]
|
|
|
|
cls.query_metrics = [
|
|
|
|
metric
|
|
|
|
for metric in cls.metrics
|
|
|
|
if issubclass(metric, QueryMetric) and metric.is_col_metric()
|
|
|
|
]
|
|
|
|
|
|
|
|
def test_init_interface(self):
|
|
|
|
"""Test we can instantiate our interface object correctly"""
|
|
|
|
|
|
|
|
assert isinstance(self.sqa_profiler_interface.session, Session)
|
|
|
|
|
|
|
|
def test_get_all_metrics(self):
|
|
|
|
table_metrics = [
|
2023-11-17 17:51:39 +01:00
|
|
|
ThreadPoolMetrics(
|
|
|
|
metrics=[
|
2022-12-07 14:33:30 +01:00
|
|
|
metric
|
|
|
|
for metric in self.metrics
|
|
|
|
if (not metric.is_col_metric() and not metric.is_system_metrics())
|
|
|
|
],
|
2023-11-17 17:51:39 +01:00
|
|
|
metric_type=MetricTypes.Table,
|
|
|
|
column=None,
|
|
|
|
table=self.table,
|
2022-07-29 10:41:53 +02:00
|
|
|
)
|
|
|
|
]
|
|
|
|
column_metrics = []
|
|
|
|
query_metrics = []
|
|
|
|
window_metrics = []
|
|
|
|
for col in inspect(User).c:
|
|
|
|
column_metrics.append(
|
2023-11-17 17:51:39 +01:00
|
|
|
ThreadPoolMetrics(
|
|
|
|
metrics=[
|
2022-07-29 10:41:53 +02:00
|
|
|
metric
|
|
|
|
for metric in self.static_metrics
|
|
|
|
if metric.is_col_metric() and not metric.is_window_metric()
|
|
|
|
],
|
2023-11-17 17:51:39 +01:00
|
|
|
metric_type=MetricTypes.Static,
|
|
|
|
column=col,
|
|
|
|
table=self.table,
|
2022-07-29 10:41:53 +02:00
|
|
|
)
|
|
|
|
)
|
|
|
|
for query_metric in self.query_metrics:
|
|
|
|
query_metrics.append(
|
2023-11-17 17:51:39 +01:00
|
|
|
ThreadPoolMetrics(
|
|
|
|
metrics=query_metric,
|
|
|
|
metric_type=MetricTypes.Query,
|
|
|
|
column=col,
|
|
|
|
table=self.table,
|
2022-07-29 10:41:53 +02:00
|
|
|
)
|
|
|
|
)
|
2023-03-03 21:56:32 +01:00
|
|
|
window_metrics.append(
|
2023-11-17 17:51:39 +01:00
|
|
|
ThreadPoolMetrics(
|
|
|
|
metrics=[
|
2023-03-03 21:56:32 +01:00
|
|
|
metric
|
|
|
|
for metric in self.window_metrics
|
|
|
|
if metric.is_window_metric()
|
|
|
|
],
|
2023-11-17 17:51:39 +01:00
|
|
|
metric_type=MetricTypes.Window,
|
|
|
|
column=col,
|
|
|
|
table=self.table,
|
2022-07-29 10:41:53 +02:00
|
|
|
)
|
2023-03-03 21:56:32 +01:00
|
|
|
)
|
2022-07-29 10:41:53 +02:00
|
|
|
|
|
|
|
all_metrics = [*table_metrics, *column_metrics, *query_metrics, *window_metrics]
|
|
|
|
|
|
|
|
profile_results = self.sqa_profiler_interface.get_all_metrics(
|
|
|
|
all_metrics,
|
|
|
|
)
|
|
|
|
|
|
|
|
column_profile = [
|
|
|
|
ColumnProfile(**profile_results["columns"].get(col.name))
|
|
|
|
for col in inspect(User).c
|
|
|
|
if profile_results["columns"].get(col.name)
|
|
|
|
]
|
|
|
|
|
|
|
|
table_profile = TableProfile(
|
|
|
|
columnCount=profile_results["table"].get("columnCount"),
|
|
|
|
rowCount=profile_results["table"].get(RowCount.name()),
|
2024-06-07 04:36:17 +02:00
|
|
|
timestamp=Timestamp(int(datetime.now().timestamp())),
|
2022-08-22 09:01:24 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
profile_request = CreateTableProfileRequest(
|
|
|
|
tableProfile=table_profile, columnProfile=column_profile
|
2022-07-29 10:41:53 +02:00
|
|
|
)
|
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
assert profile_request.tableProfile.columnCount == 6
|
|
|
|
assert profile_request.tableProfile.rowCount == 2
|
2022-07-29 10:41:53 +02:00
|
|
|
name_column_profile = [
|
2022-08-22 09:01:24 -07:00
|
|
|
profile
|
|
|
|
for profile in profile_request.columnProfile
|
|
|
|
if profile.name == "name"
|
2022-07-29 10:41:53 +02:00
|
|
|
][0]
|
|
|
|
id_column_profile = [
|
2022-08-22 09:01:24 -07:00
|
|
|
profile for profile in profile_request.columnProfile if profile.name == "id"
|
2022-07-29 10:41:53 +02:00
|
|
|
][0]
|
|
|
|
assert name_column_profile.nullCount == 0
|
2023-03-03 21:56:32 +01:00
|
|
|
assert id_column_profile.median == 1.0
|
2022-07-29 10:41:53 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls) -> None:
|
|
|
|
os.remove(cls.db_path)
|
|
|
|
return super().tearDownClass()
|