mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-07 21:17:07 +00:00
* Added database filter in workflow * Removed association between profiler and data quality * fixed tests with removed association * Fixed sonar code smells and bugs
371 lines
13 KiB
Python
371 lines
13 KiB
Python
# Copyright 2021 Collate
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Test validations that need a session configured to run
|
|
"""
|
|
from datetime import datetime
|
|
from unittest import TestCase as UnitestTestCase
|
|
|
|
from sqlalchemy import TEXT, Column, Integer, String, create_engine
|
|
from sqlalchemy.orm import declarative_base
|
|
|
|
from metadata.generated.schema.entity.data.table import ColumnProfile, TableProfile
|
|
from metadata.generated.schema.entity.services.connections.database.sqliteConnection import (
|
|
SQLiteConnection,
|
|
SQLiteScheme,
|
|
)
|
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
|
from metadata.generated.schema.tests.column.columnValuesMissingCountToBeEqual import (
|
|
ColumnValuesMissingCount,
|
|
)
|
|
from metadata.generated.schema.tests.column.columnValuesToBeInSet import (
|
|
ColumnValuesToBeInSet,
|
|
)
|
|
from metadata.generated.schema.tests.column.columnValuesToBeNotInSet import (
|
|
ColumnValuesToBeNotInSet,
|
|
)
|
|
from metadata.generated.schema.tests.column.columnValuesToMatchRegex import (
|
|
ColumnValuesToMatchRegex,
|
|
)
|
|
from metadata.generated.schema.tests.column.columnValuesToNotMatchRegex import (
|
|
ColumnValuesToNotMatchRegex,
|
|
)
|
|
from metadata.generated.schema.tests.testCase import TestCase, TestCaseParameterValue
|
|
from metadata.orm_profiler.interfaces.sqa_profiler_interface import SQAProfilerInterface
|
|
from metadata.orm_profiler.validations.core import validation_enum_registry
|
|
|
|
EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
|
Base = declarative_base()
|
|
|
|
|
|
class User(Base):
|
|
__tablename__ = "users"
|
|
id = Column(Integer, primary_key=True)
|
|
name = Column(String(256))
|
|
fullname = Column(String(256))
|
|
nickname = Column(String(256))
|
|
comments = Column(TEXT)
|
|
age = Column(Integer)
|
|
|
|
|
|
class SessionValidation(UnitestTestCase):
|
|
"""
|
|
Run checks on different metrics
|
|
"""
|
|
|
|
sqlite_conn = SQLiteConnection(scheme=SQLiteScheme.sqlite_pysqlite)
|
|
sqa_profiler_interface = SQAProfilerInterface(sqlite_conn, table=User)
|
|
engine = sqa_profiler_interface.session.get_bind()
|
|
session = sqa_profiler_interface.session
|
|
|
|
@classmethod
|
|
def setUpClass(cls) -> None:
|
|
"""
|
|
Prepare Ingredients
|
|
"""
|
|
User.__table__.create(bind=cls.engine)
|
|
|
|
data = [
|
|
User(
|
|
name="John",
|
|
fullname="John Doe",
|
|
nickname="johnny b goode",
|
|
comments="no comments",
|
|
age=30,
|
|
),
|
|
User(
|
|
name="Jane",
|
|
fullname="Jone Doe",
|
|
nickname=None,
|
|
comments="maybe some comments",
|
|
age=31,
|
|
),
|
|
]
|
|
cls.session.add_all(data)
|
|
cls.session.commit()
|
|
|
|
def test_column_values_not_in_set(self):
|
|
"""
|
|
Check that the metric runs and the results are correctly validated
|
|
"""
|
|
column_profile = ColumnProfile(name="name") # column name
|
|
|
|
res_ok = validation_enum_registry.registry["columnValuesToBeNotInSet"](
|
|
ColumnValuesToBeNotInSet(forbiddenValues=["random", "forbidden"]),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Success,
|
|
result="Found countInSet=0. It should be 0.",
|
|
)
|
|
|
|
res_ko = validation_enum_registry.registry["columnValuesToBeNotInSet"](
|
|
ColumnValuesToBeNotInSet(forbiddenValues=["John", "forbidden"]),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ko == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Failed,
|
|
result="Found countInSet=1. It should be 0.",
|
|
)
|
|
|
|
res_aborted = validation_enum_registry.registry["columnValuesToBeNotInSet"](
|
|
ColumnValuesToBeNotInSet(forbiddenValues=["John", "forbidden"]),
|
|
col_profile=ColumnProfile(name="random"),
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_aborted == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Aborted,
|
|
result=(
|
|
"Error computing ColumnValuesToBeNotInSet for users.random - Cannot find"
|
|
+ " the configured column random for ColumnValuesToBeNotInSet"
|
|
),
|
|
)
|
|
|
|
def test_column_values_to_match_regex(self):
|
|
"""
|
|
Check that the metric runs and the results are correctly validated
|
|
"""
|
|
column_profile = ColumnProfile(name="name", valuesCount=2) # column name
|
|
|
|
res_ok = validation_enum_registry.registry["columnValuesToMatchRegex"](
|
|
ColumnValuesToMatchRegex(regex="J%"),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Success,
|
|
result="Found 2 value(s) matching regex pattern vs 2 value(s) in the column.",
|
|
)
|
|
|
|
res_ko = validation_enum_registry.registry["columnValuesToMatchRegex"](
|
|
ColumnValuesToMatchRegex(regex="Jo%"),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ko == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Failed,
|
|
result="Found 1 value(s) matching regex pattern vs 2 value(s) in the column.",
|
|
)
|
|
|
|
res_aborted = validation_enum_registry.registry["columnValuesToMatchRegex"](
|
|
ColumnValuesToMatchRegex(regex="J%"),
|
|
col_profile=ColumnProfile(name="name"),
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_aborted == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Aborted,
|
|
result=(
|
|
"We expect `valuesCount` to be informed for ColumnValuesToMatchRegex."
|
|
),
|
|
)
|
|
|
|
def test_column_values_missing_count_to_be_equal(self):
|
|
"""
|
|
Check that the metric runs and the results are correctly validated
|
|
"""
|
|
column_profile = ColumnProfile(name="nickname", nullCount=1)
|
|
|
|
res_ok = validation_enum_registry.registry["columnValuesMissingCountToBeEqual"](
|
|
ColumnValuesMissingCount(missingCountValue=1),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Success,
|
|
result="Found missingCount=1.0. It should be 1.",
|
|
)
|
|
|
|
res_ok_2 = validation_enum_registry.registry[
|
|
"columnValuesMissingCountToBeEqual"
|
|
](
|
|
ColumnValuesMissingCount(
|
|
missingCountValue=2,
|
|
missingValueMatch=["johnny b goode"],
|
|
),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ok_2 == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Success,
|
|
result="Found missingCount=2.0. It should be 2.",
|
|
)
|
|
|
|
res_ko = validation_enum_registry.registry["columnValuesMissingCountToBeEqual"](
|
|
ColumnValuesMissingCount(
|
|
missingCountValue=0,
|
|
),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ko == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Failed,
|
|
result="Found missingCount=1.0. It should be 0.",
|
|
)
|
|
|
|
res_aborted = validation_enum_registry.registry[
|
|
"columnValuesMissingCountToBeEqual"
|
|
](
|
|
ColumnValuesMissingCount(
|
|
missingCountValue=0,
|
|
),
|
|
col_profile=ColumnProfile(name="nickname"),
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_aborted == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Aborted,
|
|
result=(
|
|
"We expect `nullCount` to be informed on the profiler for ColumnValuesMissingCount."
|
|
),
|
|
)
|
|
|
|
def test_column_values_to_be_in_set(self):
|
|
"""
|
|
Check that the metric runs and the results are correctly validated
|
|
"""
|
|
column_profile = ColumnProfile(name="name") # column name
|
|
|
|
res_ok = validation_enum_registry.registry["columnValuesToBeInSet"](
|
|
ColumnValuesToBeInSet(allowedValues=["random", "forbidden"]),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Failed,
|
|
result="Found countInSet=0",
|
|
)
|
|
|
|
def test_column_values_to_not_match_regex(self):
|
|
"""
|
|
Check that the metric runs and the results are correctly validated
|
|
"""
|
|
column_profile = ColumnProfile(name="name", valuesCount=2) # column name
|
|
|
|
res_ok = validation_enum_registry.registry["columnValuesToNotMatchRegex"](
|
|
ColumnValuesToNotMatchRegex(forbiddenRegex="J%"),
|
|
col_profile=column_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
runner=self.sqa_profiler_interface.runner,
|
|
table=User,
|
|
)
|
|
|
|
print(res_ok)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Failed,
|
|
result="Found 2 matching the forbidden regex pattern. Expected 0.",
|
|
)
|
|
|
|
def test_table_custom_sql_query(self):
|
|
"""
|
|
Check that the metric runs and the results are correctly validated
|
|
"""
|
|
table_profile = TableProfile(timestamp=EXECUTION_DATE.timestamp())
|
|
print(validation_enum_registry.registry["TableCustomSQLQuery"])
|
|
res_ok = (
|
|
validation_enum_registry.registry["TableCustomSQLQuery"](
|
|
TestCase(
|
|
name="my_test_case",
|
|
parameterValues=[
|
|
TestCaseParameterValue(
|
|
name="sqlExpression",
|
|
value="SELECT * FROM users WHERE age < 10",
|
|
)
|
|
],
|
|
),
|
|
test_definition=None,
|
|
table_profile=table_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
session=self.session,
|
|
table=User,
|
|
)
|
|
or []
|
|
)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Success,
|
|
result="Found 0 row(s). Test query is expected to return 0 row.",
|
|
)
|
|
|
|
res_ok = (
|
|
validation_enum_registry.registry["TableCustomSQLQuery"](
|
|
TestCase(
|
|
name="my_test_case",
|
|
parameterValues=[
|
|
TestCaseParameterValue(
|
|
name="sqlExpression",
|
|
value="SELECT * FROM users WHERE LOWER(name) LIKE '%john%'",
|
|
)
|
|
],
|
|
),
|
|
test_definition=None,
|
|
table_profile=table_profile,
|
|
execution_date=EXECUTION_DATE,
|
|
session=self.session,
|
|
table=User,
|
|
)
|
|
or []
|
|
)
|
|
|
|
assert res_ok == TestCaseResult(
|
|
timestamp=EXECUTION_DATE.timestamp(),
|
|
testCaseStatus=TestCaseStatus.Failed,
|
|
result="Found 1 row(s). Test query is expected to return 0 row.",
|
|
)
|