2022-07-20 17:54:10 +02:00
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
Test SQA Interface
|
|
|
|
"""
|
|
|
|
|
2022-07-29 10:41:53 +02:00
|
|
|
import os
|
2022-08-22 09:01:24 -07:00
|
|
|
from datetime import datetime, timezone
|
2022-07-20 17:54:10 +02:00
|
|
|
from unittest import TestCase
|
2022-08-19 10:52:08 +02:00
|
|
|
from uuid import uuid4
|
2022-07-20 17:54:10 +02:00
|
|
|
|
|
|
|
from pytest import raises
|
2022-07-29 10:41:53 +02:00
|
|
|
from sqlalchemy import TEXT, Column, Integer, String, inspect
|
2022-07-20 17:54:10 +02:00
|
|
|
from sqlalchemy.orm import declarative_base
|
|
|
|
from sqlalchemy.orm.session import Session
|
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
from metadata.generated.schema.api.data.createTableProfile import (
|
|
|
|
CreateTableProfileRequest,
|
|
|
|
)
|
2022-08-19 10:52:08 +02:00
|
|
|
from metadata.generated.schema.entity.data.table import Column as EntityColumn
|
|
|
|
from metadata.generated.schema.entity.data.table import (
|
|
|
|
ColumnName,
|
|
|
|
ColumnProfile,
|
|
|
|
DataType,
|
|
|
|
Table,
|
|
|
|
TableProfile,
|
|
|
|
)
|
2022-07-20 17:54:10 +02:00
|
|
|
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
|
|
|
|
SQLiteConnection,
|
|
|
|
SQLiteScheme,
|
|
|
|
)
|
2022-08-25 10:01:28 +02:00
|
|
|
from metadata.interfaces.sqa_interface import SQAInterface
|
2022-07-29 10:41:53 +02:00
|
|
|
from metadata.orm_profiler.metrics.core import (
|
|
|
|
ComposedMetric,
|
|
|
|
MetricTypes,
|
|
|
|
QueryMetric,
|
|
|
|
StaticMetric,
|
|
|
|
)
|
|
|
|
from metadata.orm_profiler.metrics.static.row_count import RowCount
|
|
|
|
from metadata.orm_profiler.profiler.default import get_default_metrics
|
2022-07-20 17:54:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
class User(declarative_base()):
|
|
|
|
__tablename__ = "users"
|
|
|
|
id = Column(Integer, primary_key=True)
|
|
|
|
name = Column(String(256))
|
|
|
|
fullname = Column(String(256))
|
|
|
|
nickname = Column(String(256))
|
|
|
|
comments = Column(TEXT)
|
|
|
|
age = Column(Integer)
|
|
|
|
|
|
|
|
|
2022-08-25 10:01:28 +02:00
|
|
|
class SQAInterfaceTest(TestCase):
|
2022-07-20 17:54:10 +02:00
|
|
|
def setUp(self) -> None:
|
2022-08-19 10:52:08 +02:00
|
|
|
table_entity = Table(
|
|
|
|
id=uuid4(),
|
|
|
|
name="user",
|
|
|
|
columns=[
|
|
|
|
EntityColumn(
|
|
|
|
name=ColumnName(__root__="id"),
|
|
|
|
dataType=DataType.INT,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
2022-07-20 17:54:10 +02:00
|
|
|
sqlite_conn = SQLiteConnection(
|
|
|
|
scheme=SQLiteScheme.sqlite_pysqlite,
|
|
|
|
)
|
2022-08-25 10:01:28 +02:00
|
|
|
self.sqa_profiler_interface = SQAInterface(
|
2022-08-19 10:52:08 +02:00
|
|
|
sqlite_conn, table=User, table_entity=table_entity
|
|
|
|
)
|
2022-07-20 17:54:10 +02:00
|
|
|
self.table = User
|
|
|
|
|
|
|
|
def test_init_interface(self):
|
|
|
|
"""Test we can instantiate our interface object correctly"""
|
|
|
|
|
2022-08-17 12:53:16 +02:00
|
|
|
assert self.sqa_profiler_interface._sampler != None
|
|
|
|
assert self.sqa_profiler_interface._runner != None
|
2022-07-20 17:54:10 +02:00
|
|
|
assert isinstance(self.sqa_profiler_interface.session, Session)
|
|
|
|
|
|
|
|
def test_private_attributes(self):
|
|
|
|
with raises(AttributeError):
|
|
|
|
self.sqa_profiler_interface.runner = None
|
|
|
|
self.sqa_profiler_interface.sampler = None
|
|
|
|
self.sqa_profiler_interface.sample = None
|
|
|
|
|
|
|
|
def tearDown(self) -> None:
|
|
|
|
self.sqa_profiler_interface._sampler = None
|
2022-07-29 10:41:53 +02:00
|
|
|
|
|
|
|
|
2022-08-25 10:01:28 +02:00
|
|
|
class SQAInterfaceTestMultiThread(TestCase):
|
2022-07-29 10:41:53 +02:00
|
|
|
|
2022-08-19 10:52:08 +02:00
|
|
|
table_entity = Table(
|
|
|
|
id=uuid4(),
|
|
|
|
name="user",
|
|
|
|
columns=[
|
|
|
|
EntityColumn(
|
|
|
|
name=ColumnName(__root__="id"),
|
|
|
|
dataType=DataType.INT,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
2022-07-29 10:41:53 +02:00
|
|
|
db_path = os.path.join(os.path.dirname(__file__), "test.db")
|
|
|
|
sqlite_conn = SQLiteConnection(
|
|
|
|
scheme=SQLiteScheme.sqlite_pysqlite,
|
|
|
|
databaseMode=db_path + "?check_same_thread=False",
|
|
|
|
)
|
2022-08-25 10:01:28 +02:00
|
|
|
sqa_profiler_interface = SQAInterface(
|
2022-08-19 10:52:08 +02:00
|
|
|
sqlite_conn, table=User, table_entity=table_entity
|
|
|
|
)
|
2022-07-29 10:41:53 +02:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls) -> None:
|
|
|
|
"""
|
|
|
|
Prepare Ingredients
|
|
|
|
"""
|
|
|
|
User.__table__.create(bind=cls.sqa_profiler_interface.session.get_bind())
|
|
|
|
|
|
|
|
data = [
|
|
|
|
User(name="John", fullname="John Doe", nickname="johnny b goode", age=30),
|
|
|
|
User(name="Jane", fullname="Jone Doe", nickname=None, age=31),
|
|
|
|
]
|
|
|
|
cls.sqa_profiler_interface.session.add_all(data)
|
|
|
|
cls.sqa_profiler_interface.session.commit()
|
|
|
|
cls.table = User
|
|
|
|
cls.metrics = get_default_metrics(cls.table)
|
|
|
|
cls.static_metrics = [
|
|
|
|
metric for metric in cls.metrics if issubclass(metric, StaticMetric)
|
|
|
|
]
|
|
|
|
cls.composed_metrics = [
|
|
|
|
metric for metric in cls.metrics if issubclass(metric, ComposedMetric)
|
|
|
|
]
|
|
|
|
cls.window_metrics = [
|
|
|
|
metric
|
|
|
|
for metric in cls.metrics
|
|
|
|
if issubclass(metric, StaticMetric) and metric.is_window_metric()
|
|
|
|
]
|
|
|
|
cls.query_metrics = [
|
|
|
|
metric
|
|
|
|
for metric in cls.metrics
|
|
|
|
if issubclass(metric, QueryMetric) and metric.is_col_metric()
|
|
|
|
]
|
|
|
|
|
|
|
|
def test_init_interface(self):
|
|
|
|
"""Test we can instantiate our interface object correctly"""
|
|
|
|
|
2022-08-17 12:53:16 +02:00
|
|
|
assert self.sqa_profiler_interface._sampler != None
|
|
|
|
assert self.sqa_profiler_interface._runner != None
|
2022-07-29 10:41:53 +02:00
|
|
|
assert isinstance(self.sqa_profiler_interface.session, Session)
|
|
|
|
|
|
|
|
def test_get_all_metrics(self):
|
|
|
|
table_metrics = [
|
|
|
|
(
|
|
|
|
[metric for metric in self.metrics if not metric.is_col_metric()],
|
|
|
|
MetricTypes.Table,
|
|
|
|
None,
|
|
|
|
self.table,
|
|
|
|
)
|
|
|
|
]
|
|
|
|
column_metrics = []
|
|
|
|
query_metrics = []
|
|
|
|
window_metrics = []
|
|
|
|
for col in inspect(User).c:
|
|
|
|
column_metrics.append(
|
|
|
|
(
|
|
|
|
[
|
|
|
|
metric
|
|
|
|
for metric in self.static_metrics
|
|
|
|
if metric.is_col_metric() and not metric.is_window_metric()
|
|
|
|
],
|
|
|
|
MetricTypes.Static,
|
|
|
|
col,
|
|
|
|
self.table,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
for query_metric in self.query_metrics:
|
|
|
|
query_metrics.append(
|
|
|
|
(
|
|
|
|
query_metric,
|
|
|
|
MetricTypes.Query,
|
|
|
|
col,
|
|
|
|
self.table,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
for window_metric in self.window_metrics:
|
|
|
|
window_metrics.append(
|
|
|
|
(
|
|
|
|
window_metric,
|
|
|
|
MetricTypes.Window,
|
|
|
|
col,
|
|
|
|
self.table,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
all_metrics = [*table_metrics, *column_metrics, *query_metrics, *window_metrics]
|
|
|
|
|
|
|
|
profile_results = self.sqa_profiler_interface.get_all_metrics(
|
|
|
|
all_metrics,
|
|
|
|
)
|
|
|
|
|
|
|
|
column_profile = [
|
|
|
|
ColumnProfile(**profile_results["columns"].get(col.name))
|
|
|
|
for col in inspect(User).c
|
|
|
|
if profile_results["columns"].get(col.name)
|
|
|
|
]
|
|
|
|
|
|
|
|
table_profile = TableProfile(
|
|
|
|
columnCount=profile_results["table"].get("columnCount"),
|
|
|
|
rowCount=profile_results["table"].get(RowCount.name()),
|
2022-08-22 09:01:24 -07:00
|
|
|
timestamp=datetime.now(tz=timezone.utc).timestamp(),
|
|
|
|
)
|
|
|
|
|
|
|
|
profile_request = CreateTableProfileRequest(
|
|
|
|
tableProfile=table_profile, columnProfile=column_profile
|
2022-07-29 10:41:53 +02:00
|
|
|
)
|
|
|
|
|
2022-08-22 09:01:24 -07:00
|
|
|
assert profile_request.tableProfile.columnCount == 6
|
|
|
|
assert profile_request.tableProfile.rowCount == 2
|
2022-07-29 10:41:53 +02:00
|
|
|
name_column_profile = [
|
2022-08-22 09:01:24 -07:00
|
|
|
profile
|
|
|
|
for profile in profile_request.columnProfile
|
|
|
|
if profile.name == "name"
|
2022-07-29 10:41:53 +02:00
|
|
|
][0]
|
|
|
|
id_column_profile = [
|
2022-08-22 09:01:24 -07:00
|
|
|
profile for profile in profile_request.columnProfile if profile.name == "id"
|
2022-07-29 10:41:53 +02:00
|
|
|
][0]
|
|
|
|
assert name_column_profile.nullCount == 0
|
|
|
|
assert id_column_profile.median == 1.5
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls) -> None:
|
|
|
|
os.remove(cls.db_path)
|
|
|
|
return super().tearDownClass()
|