mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-11-22 21:51:53 +00:00
Fix #3084 - Implement missing tests
This commit is contained in:
parent
bddb16e859
commit
e96ac838ff
34
.github/workflows/deploy-docs.yml
vendored
34
.github/workflows/deploy-docs.yml
vendored
@ -1,34 +0,0 @@
|
|||||||
# Copyright 2021 Collate
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
name: Deploy Docs
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- 0.6.0
|
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
- '.github/workflows/deploy-docs.yml'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-and-deploy:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout 🛎️
|
|
||||||
uses: actions/checkout@v2.3.1
|
|
||||||
|
|
||||||
- name: Deploy 🚀
|
|
||||||
uses: JamesIves/github-pages-deploy-action@4.1.6
|
|
||||||
with:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
BRANCH: main
|
|
||||||
FOLDER: docs
|
|
||||||
target-folder: docs
|
|
||||||
@ -8,9 +8,7 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
"values": {
|
"values": {
|
||||||
"description": "An Array of values.",
|
"description": "An Array of values.",
|
||||||
"items": {
|
"type": "array"
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["values"],
|
"required": ["values"],
|
||||||
|
|||||||
@ -7,5 +7,5 @@ Provides metadata version information.
|
|||||||
|
|
||||||
from incremental import Version
|
from incremental import Version
|
||||||
|
|
||||||
__version__ = Version("metadata", 0, 9, 0, dev=17)
|
__version__ = Version("metadata", 0, 9, 0, dev=18)
|
||||||
__all__ = ["__version__"]
|
__all__ = ["__version__"]
|
||||||
|
|||||||
@ -23,7 +23,9 @@ from metadata.orm_profiler.metrics.composed.ilike_ratio import ILikeRatio
|
|||||||
from metadata.orm_profiler.metrics.composed.like_ratio import LikeRatio
|
from metadata.orm_profiler.metrics.composed.like_ratio import LikeRatio
|
||||||
from metadata.orm_profiler.metrics.composed.null_ratio import NullRatio
|
from metadata.orm_profiler.metrics.composed.null_ratio import NullRatio
|
||||||
from metadata.orm_profiler.metrics.composed.unique_ratio import UniqueRatio
|
from metadata.orm_profiler.metrics.composed.unique_ratio import UniqueRatio
|
||||||
|
from metadata.orm_profiler.metrics.static.column_count import ColumnCount
|
||||||
from metadata.orm_profiler.metrics.static.count import Count
|
from metadata.orm_profiler.metrics.static.count import Count
|
||||||
|
from metadata.orm_profiler.metrics.static.count_in_set import CountInSet
|
||||||
from metadata.orm_profiler.metrics.static.histogram import Histogram
|
from metadata.orm_profiler.metrics.static.histogram import Histogram
|
||||||
from metadata.orm_profiler.metrics.static.ilike_count import ILikeCount
|
from metadata.orm_profiler.metrics.static.ilike_count import ILikeCount
|
||||||
from metadata.orm_profiler.metrics.static.like_count import LikeCount
|
from metadata.orm_profiler.metrics.static.like_count import LikeCount
|
||||||
@ -50,6 +52,8 @@ class Metrics(MetricRegistry):
|
|||||||
# Static Metrics
|
# Static Metrics
|
||||||
MEAN = Mean
|
MEAN = Mean
|
||||||
COUNT = Count
|
COUNT = Count
|
||||||
|
COUNT_IN_SET = CountInSet
|
||||||
|
COLUMN_COUNT = ColumnCount
|
||||||
HISTOGRAM = Histogram
|
HISTOGRAM = Histogram
|
||||||
ILIKE_COUNT = ILikeCount
|
ILIKE_COUNT = ILikeCount
|
||||||
LIKE_COUNT = LikeCount
|
LIKE_COUNT = LikeCount
|
||||||
|
|||||||
@ -0,0 +1,47 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Table Column Count Metric definition
|
||||||
|
"""
|
||||||
|
from sqlalchemy import func, inspect, literal
|
||||||
|
|
||||||
|
from metadata.orm_profiler.metrics.core import StaticMetric, _label
|
||||||
|
|
||||||
|
|
||||||
|
class ColumnCount(StaticMetric):
|
||||||
|
"""
|
||||||
|
COLUMN_COUNT Metric
|
||||||
|
|
||||||
|
Count all columns on a table
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def name(cls):
|
||||||
|
return "columnCount"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_col_metric(cls) -> bool:
|
||||||
|
"""
|
||||||
|
Mark the class as a Table Metric
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
def metric_type(self):
|
||||||
|
return int
|
||||||
|
|
||||||
|
@_label
|
||||||
|
def fn(self):
|
||||||
|
if not hasattr(self, "table"):
|
||||||
|
raise AttributeError(
|
||||||
|
"Column Count requires a table to be set: add_props(table=...)(Metrics.COLUMN_COUNT)"
|
||||||
|
)
|
||||||
|
return literal(len(inspect(self.table).c))
|
||||||
@ -0,0 +1,52 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
CountInSet Metric definition
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sqlalchemy import case, func
|
||||||
|
|
||||||
|
from metadata.orm_profiler.metrics.core import StaticMetric, _label
|
||||||
|
from metadata.orm_profiler.utils import logger
|
||||||
|
|
||||||
|
logger = logger()
|
||||||
|
|
||||||
|
|
||||||
|
class CountInSet(StaticMetric):
|
||||||
|
"""
|
||||||
|
COUNT_IN_SET Metric
|
||||||
|
|
||||||
|
Given a column, return the count of values in a given set.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def name(cls):
|
||||||
|
return "countInSet"
|
||||||
|
|
||||||
|
def metric_type(self):
|
||||||
|
return int
|
||||||
|
|
||||||
|
@_label
|
||||||
|
def fn(self):
|
||||||
|
if not hasattr(self, "values"):
|
||||||
|
raise AttributeError(
|
||||||
|
"CountInSet requires a set of values to be validate: add_props(values=...)(Metrics.COUNT_IN_SET)"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
set_values = set(self.values)
|
||||||
|
return func.sum(case([(self.col.in_(set_values), 1)], else_=0))
|
||||||
|
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
logger.error(f"Error trying to run countInSet for {self.col} - {err}")
|
||||||
|
print(err)
|
||||||
|
return None
|
||||||
@ -36,6 +36,6 @@ class ILikeCount(StaticMetric):
|
|||||||
def fn(self):
|
def fn(self):
|
||||||
if not hasattr(self, "expression"):
|
if not hasattr(self, "expression"):
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
"ILike Count requires an expression to be set: Metrics.ILIKE_COUNT(col, expression=...)"
|
"ILike Count requires an expression to be set: add_props(expression=...)(Metrics.ILIKE_COUNT)"
|
||||||
)
|
)
|
||||||
return func.count(self.col.ilike(self.expression))
|
return func.count(self.col.ilike(self.expression))
|
||||||
|
|||||||
@ -36,6 +36,6 @@ class LikeCount(StaticMetric):
|
|||||||
def fn(self):
|
def fn(self):
|
||||||
if not hasattr(self, "expression"):
|
if not hasattr(self, "expression"):
|
||||||
raise AttributeError(
|
raise AttributeError(
|
||||||
"Like Count requires an expression to be set: Metrics.LIKE_COUNT(col, expression=...)"
|
"Like Count requires an expression to be set: add_props(expression=...)(Metrics.LIKE_COUNT)"
|
||||||
)
|
)
|
||||||
return func.count(self.col.like(self.expression))
|
return func.count(self.col.like(self.expression))
|
||||||
|
|||||||
@ -66,7 +66,7 @@ class Mean(StaticMetric):
|
|||||||
if is_concatenable(self.col.type):
|
if is_concatenable(self.col.type):
|
||||||
return ConcatAvgFn(self.col)
|
return ConcatAvgFn(self.col)
|
||||||
|
|
||||||
logger.warning(
|
logger.debug(
|
||||||
f"Don't know how to process type {self.col.type} when computing AVG"
|
f"Don't know how to process type {self.col.type} when computing MEAN"
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|||||||
@ -33,6 +33,7 @@ def _(element, compiler, **kw):
|
|||||||
return "CONCAT(%s)" % compiler.process(element.clauses, **kw)
|
return "CONCAT(%s)" % compiler.process(element.clauses, **kw)
|
||||||
|
|
||||||
|
|
||||||
|
@compiles(ConcatFn, DatabaseServiceType.Redshift.value.lower())
|
||||||
@compiles(ConcatFn, DatabaseServiceType.SQLite.value.lower())
|
@compiles(ConcatFn, DatabaseServiceType.SQLite.value.lower())
|
||||||
def _(element, compiler, **kw):
|
def _(element, compiler, **kw):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -173,12 +173,17 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
return table.fullyQualifiedName + col + test_type
|
return table.fullyQualifiedName + col + test_type
|
||||||
|
|
||||||
def run_table_test(
|
def run_table_test(
|
||||||
self, table: Table, test_case: TableTestCase, profiler_results: TableProfile
|
self,
|
||||||
|
table: Table,
|
||||||
|
orm_table,
|
||||||
|
test_case: TableTestCase,
|
||||||
|
profiler_results: TableProfile,
|
||||||
) -> Optional[TestCaseResult]:
|
) -> Optional[TestCaseResult]:
|
||||||
"""
|
"""
|
||||||
Run & log the table test against the TableProfile.
|
Run & log the table test against the TableProfile.
|
||||||
|
|
||||||
:param table: Table Entity being processed
|
:param table: Table Entity being processed
|
||||||
|
:param orm_table: Declarative Meta
|
||||||
:param test_case: Table Test Case to run
|
:param test_case: Table Test Case to run
|
||||||
:param profiler_results: Table profiler with informed metrics
|
:param profiler_results: Table profiler with informed metrics
|
||||||
:return: TestCaseResult
|
:return: TestCaseResult
|
||||||
@ -196,6 +201,8 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
test_case.config,
|
test_case.config,
|
||||||
table_profile=profiler_results,
|
table_profile=profiler_results,
|
||||||
execution_date=self.execution_date,
|
execution_date=self.execution_date,
|
||||||
|
session=self.session,
|
||||||
|
table=orm_table,
|
||||||
)
|
)
|
||||||
self.log_test_result(name=test_name, result=test_case_result)
|
self.log_test_result(name=test_name, result=test_case_result)
|
||||||
return test_case_result
|
return test_case_result
|
||||||
@ -203,6 +210,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
def run_column_test(
|
def run_column_test(
|
||||||
self,
|
self,
|
||||||
table: Table,
|
table: Table,
|
||||||
|
orm_table,
|
||||||
column: str,
|
column: str,
|
||||||
test_case: ColumnTestCase,
|
test_case: ColumnTestCase,
|
||||||
profiler_results: TableProfile,
|
profiler_results: TableProfile,
|
||||||
@ -211,6 +219,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
Run & log the column test against the ColumnProfile
|
Run & log the column test against the ColumnProfile
|
||||||
|
|
||||||
:param table: Table Entity being processed
|
:param table: Table Entity being processed
|
||||||
|
:param orm_table: Declarative Meta
|
||||||
:param column: Column being tested
|
:param column: Column being tested
|
||||||
:param test_case: Column Test Case to run
|
:param test_case: Column Test Case to run
|
||||||
:param profiler_results: Table profiler with informed metrics
|
:param profiler_results: Table profiler with informed metrics
|
||||||
@ -248,20 +257,26 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
|
|
||||||
test_case_result: TestCaseResult = validate(
|
test_case_result: TestCaseResult = validate(
|
||||||
test_case.config,
|
test_case.config,
|
||||||
col_profiler_res,
|
col_profile=col_profiler_res,
|
||||||
execution_date=self.execution_date,
|
execution_date=self.execution_date,
|
||||||
|
session=self.session,
|
||||||
|
table=orm_table,
|
||||||
)
|
)
|
||||||
self.log_test_result(name=test_name, result=test_case_result)
|
self.log_test_result(name=test_name, result=test_case_result)
|
||||||
return test_case_result
|
return test_case_result
|
||||||
|
|
||||||
def validate_config_tests(
|
def validate_config_tests(
|
||||||
self, table: Table, profiler_results: TableProfile
|
self, table: Table, orm_table, profiler_results: TableProfile
|
||||||
) -> Optional[TestDef]:
|
) -> Optional[TestDef]:
|
||||||
"""
|
"""
|
||||||
Here we take care of new incoming tests in the workflow
|
Here we take care of new incoming tests in the workflow
|
||||||
definition. Run them and prepare the new TestDef
|
definition. Run them and prepare the new TestDef
|
||||||
of the record, that will be sent to the sink to
|
of the record, that will be sent to the sink to
|
||||||
update the Table Entity.
|
update the Table Entity.
|
||||||
|
|
||||||
|
:param table: OpenMetadata Table Entity being processed
|
||||||
|
:param orm_table: Declarative Meta
|
||||||
|
:param profiler_results: TableProfile with computed metrics
|
||||||
"""
|
"""
|
||||||
|
|
||||||
logger.info(f"Checking validations for {table.fullyQualifiedName}...")
|
logger.info(f"Checking validations for {table.fullyQualifiedName}...")
|
||||||
@ -285,6 +300,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
for table_test in my_record_tests.table_tests:
|
for table_test in my_record_tests.table_tests:
|
||||||
test_case_result = self.run_table_test(
|
test_case_result = self.run_table_test(
|
||||||
table=table,
|
table=table,
|
||||||
|
orm_table=orm_table,
|
||||||
test_case=table_test.testCase,
|
test_case=table_test.testCase,
|
||||||
profiler_results=profiler_results,
|
profiler_results=profiler_results,
|
||||||
)
|
)
|
||||||
@ -294,6 +310,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
for column_test in my_record_tests.column_tests:
|
for column_test in my_record_tests.column_tests:
|
||||||
test_case_result = self.run_column_test(
|
test_case_result = self.run_column_test(
|
||||||
table=table,
|
table=table,
|
||||||
|
orm_table=orm_table,
|
||||||
column=column_test.columnName,
|
column=column_test.columnName,
|
||||||
test_case=column_test.testCase,
|
test_case=column_test.testCase,
|
||||||
profiler_results=profiler_results,
|
profiler_results=profiler_results,
|
||||||
@ -306,6 +323,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
def validate_entity_tests(
|
def validate_entity_tests(
|
||||||
self,
|
self,
|
||||||
table: Table,
|
table: Table,
|
||||||
|
orm_table,
|
||||||
profiler_results: TableProfile,
|
profiler_results: TableProfile,
|
||||||
config_tests: Optional[TestDef],
|
config_tests: Optional[TestDef],
|
||||||
) -> Optional[TestDef]:
|
) -> Optional[TestDef]:
|
||||||
@ -319,6 +337,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
and trust the workflow input.
|
and trust the workflow input.
|
||||||
|
|
||||||
:param table: OpenMetadata Table Entity being processed
|
:param table: OpenMetadata Table Entity being processed
|
||||||
|
:param orm_table: Declarative Meta
|
||||||
:param profiler_results: TableProfile with computed metrics
|
:param profiler_results: TableProfile with computed metrics
|
||||||
:param config_tests: Results of running the configuration tests
|
:param config_tests: Results of running the configuration tests
|
||||||
"""
|
"""
|
||||||
@ -356,6 +375,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
for table_test in table_tests:
|
for table_test in table_tests:
|
||||||
test_case_result = self.run_table_test(
|
test_case_result = self.run_table_test(
|
||||||
table=table,
|
table=table,
|
||||||
|
orm_table=orm_table,
|
||||||
test_case=table_test.testCase,
|
test_case=table_test.testCase,
|
||||||
profiler_results=profiler_results,
|
profiler_results=profiler_results,
|
||||||
)
|
)
|
||||||
@ -382,6 +402,7 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
if column_test:
|
if column_test:
|
||||||
test_case_result = self.run_column_test(
|
test_case_result = self.run_column_test(
|
||||||
table=table,
|
table=table,
|
||||||
|
orm_table=orm_table,
|
||||||
column=column_test.columnName,
|
column=column_test.columnName,
|
||||||
test_case=column_test.testCase,
|
test_case=column_test.testCase,
|
||||||
profiler_results=profiler_results,
|
profiler_results=profiler_results,
|
||||||
@ -414,10 +435,12 @@ class OrmProfilerProcessor(Processor[Table]):
|
|||||||
# First, check if we have any tests directly configured in the workflow
|
# First, check if we have any tests directly configured in the workflow
|
||||||
config_tests = None
|
config_tests = None
|
||||||
if self.config.test_suite:
|
if self.config.test_suite:
|
||||||
config_tests = self.validate_config_tests(record, entity_profile)
|
config_tests = self.validate_config_tests(record, orm_table, entity_profile)
|
||||||
|
|
||||||
# Then, Check if the entity has any tests
|
# Then, Check if the entity has any tests
|
||||||
record_tests = self.validate_entity_tests(record, entity_profile, config_tests)
|
record_tests = self.validate_entity_tests(
|
||||||
|
record, orm_table, entity_profile, config_tests
|
||||||
|
)
|
||||||
|
|
||||||
res = ProfilerResponse(
|
res = ProfilerResponse(
|
||||||
table=record,
|
table=record,
|
||||||
|
|||||||
@ -218,7 +218,7 @@ class Profiler(Generic[MetricType]):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
for metric in self.get_col_metrics(self.query_metrics):
|
for metric in self.get_col_metrics(self.query_metrics):
|
||||||
|
try:
|
||||||
metric_query = metric(col).query(session=self.session)
|
metric_query = metric(col).query(session=self.session)
|
||||||
|
|
||||||
# We might not compute some metrics based on the column type.
|
# We might not compute some metrics based on the column type.
|
||||||
@ -235,6 +235,12 @@ class Profiler(Generic[MetricType]):
|
|||||||
data = {k: [dic[k] for dic in query_res] for k in dict(query_res[0])}
|
data = {k: [dic[k] for dic in query_res] for k in dict(query_res[0])}
|
||||||
self._column_results[col.name].update({metric.name(): data})
|
self._column_results[col.name].update({metric.name(): data})
|
||||||
|
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
logger.error(
|
||||||
|
f"Exception encountered computing {metric.name()} for {self.table.__tablename__}.{col.name} - {err}"
|
||||||
|
)
|
||||||
|
self.session.rollback()
|
||||||
|
|
||||||
def post_col_run(self, col: Column):
|
def post_col_run(self, col: Column):
|
||||||
"""
|
"""
|
||||||
Run this after the metrics have been computed
|
Run this after the metrics have been computed
|
||||||
@ -299,8 +305,9 @@ class Profiler(Generic[MetricType]):
|
|||||||
self.execute_column(col)
|
self.execute_column(col)
|
||||||
except Exception as exc: # pylint: disable=broad-except
|
except Exception as exc: # pylint: disable=broad-except
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Error trying to compute profile for {self.table}.{col.name} - {exc}"
|
f"Error trying to compute profile for {self.table.__tablename__}.{col.name} - {exc}"
|
||||||
)
|
)
|
||||||
|
self.session.rollback()
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@ -343,3 +350,7 @@ class Profiler(Generic[MetricType]):
|
|||||||
except ValidationError as err:
|
except ValidationError as err:
|
||||||
logger.error(f"Cannot transform profiler results to TableProfile {err}")
|
logger.error(f"Cannot transform profiler results to TableProfile {err}")
|
||||||
raise err
|
raise err
|
||||||
|
|
||||||
|
@property
|
||||||
|
def column_results(self):
|
||||||
|
return self._column_results
|
||||||
|
|||||||
@ -16,6 +16,7 @@ from typing import List, Optional
|
|||||||
|
|
||||||
from sqlalchemy.orm.session import Session
|
from sqlalchemy.orm.session import Session
|
||||||
|
|
||||||
|
from metadata.orm_profiler.metrics.core import add_props
|
||||||
from metadata.orm_profiler.metrics.registry import Metrics
|
from metadata.orm_profiler.metrics.registry import Metrics
|
||||||
from metadata.orm_profiler.profiles.core import Profiler
|
from metadata.orm_profiler.profiles.core import Profiler
|
||||||
|
|
||||||
@ -36,6 +37,7 @@ class DefaultProfiler(Profiler):
|
|||||||
_metrics = [
|
_metrics = [
|
||||||
# Table Metrics
|
# Table Metrics
|
||||||
Metrics.ROW_COUNT.value,
|
Metrics.ROW_COUNT.value,
|
||||||
|
add_props(table=table)(Metrics.COLUMN_COUNT.value),
|
||||||
# Column Metrics
|
# Column Metrics
|
||||||
Metrics.MEAN.value,
|
Metrics.MEAN.value,
|
||||||
Metrics.COUNT.value,
|
Metrics.COUNT.value,
|
||||||
|
|||||||
@ -0,0 +1,87 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
ColumnValuesToBeNotNull validation implementation
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from sqlalchemy import inspect
|
||||||
|
from sqlalchemy.orm import DeclarativeMeta, Session
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.table import ColumnProfile
|
||||||
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
||||||
|
from metadata.generated.schema.tests.column.columnValuesToBeNotInSet import (
|
||||||
|
ColumnValuesToBeNotInSet,
|
||||||
|
)
|
||||||
|
from metadata.orm_profiler.metrics.core import add_props
|
||||||
|
from metadata.orm_profiler.metrics.registry import Metrics
|
||||||
|
from metadata.orm_profiler.profiles.core import Profiler
|
||||||
|
from metadata.orm_profiler.utils import logger
|
||||||
|
|
||||||
|
logger = logger()
|
||||||
|
|
||||||
|
|
||||||
|
def column_values_not_in_set(
|
||||||
|
test_case: ColumnValuesToBeNotInSet,
|
||||||
|
col_profile: ColumnProfile,
|
||||||
|
execution_date: datetime,
|
||||||
|
session: Optional[Session] = None,
|
||||||
|
table: Optional[DeclarativeMeta] = None,
|
||||||
|
) -> TestCaseResult:
|
||||||
|
"""
|
||||||
|
Validate Column Values metric
|
||||||
|
:param test_case: ColumnValuesToBeUnique. Just used to trigger singledispatch
|
||||||
|
:param col_profile: should contain count and distinct count metrics
|
||||||
|
:param execution_date: Datetime when the tests ran
|
||||||
|
:param session: SQLAlchemy Session, for tests that need to compute new metrics
|
||||||
|
:param table: SQLAlchemy Table, for tests that need to compute new metrics
|
||||||
|
:return: TestCaseResult with status and results
|
||||||
|
"""
|
||||||
|
|
||||||
|
set_count = add_props(values=test_case.values)(Metrics.COUNT_IN_SET.value)
|
||||||
|
|
||||||
|
try:
|
||||||
|
col = next(
|
||||||
|
iter([col for col in inspect(table).c if col.name == col_profile.name]),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if col is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Cannot find the configured column {col_profile.name} for ColumnValuesToBeNotInSet"
|
||||||
|
)
|
||||||
|
|
||||||
|
res = (
|
||||||
|
Profiler(set_count, session=session, table=table, use_cols=[col])
|
||||||
|
.execute()
|
||||||
|
.column_results
|
||||||
|
)
|
||||||
|
set_count_res = res.get(col.name)[Metrics.COUNT_IN_SET.name]
|
||||||
|
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
session.rollback()
|
||||||
|
msg = f"Error computing ColumnValuesToBeNotInSet for {col_profile.name} - {err}"
|
||||||
|
logger.error(msg)
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=msg,
|
||||||
|
)
|
||||||
|
|
||||||
|
status = TestCaseStatus.Success if set_count_res == 0 else TestCaseStatus.Failed
|
||||||
|
result = f"Found countInSet={set_count_res}. It should be 0."
|
||||||
|
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(), testCaseStatus=status, result=result
|
||||||
|
)
|
||||||
@ -29,6 +29,7 @@ def column_values_to_be_between(
|
|||||||
test_case: ColumnValuesToBeBetween,
|
test_case: ColumnValuesToBeBetween,
|
||||||
col_profile: ColumnProfile,
|
col_profile: ColumnProfile,
|
||||||
execution_date: datetime,
|
execution_date: datetime,
|
||||||
|
**__,
|
||||||
) -> TestCaseResult:
|
) -> TestCaseResult:
|
||||||
"""
|
"""
|
||||||
Validate Column Values metric
|
Validate Column Values metric
|
||||||
|
|||||||
@ -0,0 +1,58 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
ColumnValuesToBeNotNull validation implementation
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.table import ColumnProfile
|
||||||
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
||||||
|
from metadata.generated.schema.tests.column.columnValuesToBeNotNull import (
|
||||||
|
ColumnValuesToBeNotNull,
|
||||||
|
)
|
||||||
|
from metadata.orm_profiler.utils import logger
|
||||||
|
|
||||||
|
logger = logger()
|
||||||
|
|
||||||
|
|
||||||
|
def column_values_to_be_not_null(
|
||||||
|
_: ColumnValuesToBeNotNull,
|
||||||
|
col_profile: ColumnProfile,
|
||||||
|
execution_date: datetime,
|
||||||
|
**__,
|
||||||
|
) -> TestCaseResult:
|
||||||
|
"""
|
||||||
|
Validate Column Values metric
|
||||||
|
:param _: ColumnValuesToBeUnique. Just used to trigger singledispatch
|
||||||
|
:param col_profile: should contain count and distinct count metrics
|
||||||
|
:param execution_date: Datetime when the tests ran
|
||||||
|
:return: TestCaseResult with status and results
|
||||||
|
"""
|
||||||
|
|
||||||
|
if col_profile.nullCount is None:
|
||||||
|
msg = "We expect `nullCount` to be informed on the profiler for ColumnValuesToBeNotNull."
|
||||||
|
logger.error(msg)
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=msg,
|
||||||
|
)
|
||||||
|
|
||||||
|
status = (
|
||||||
|
TestCaseStatus.Success if col_profile.nullCount == 0 else TestCaseStatus.Failed
|
||||||
|
)
|
||||||
|
result = f"Found nullCount={col_profile.nullCount}. It should be 0."
|
||||||
|
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(), testCaseStatus=status, result=result
|
||||||
|
)
|
||||||
@ -29,6 +29,7 @@ def column_values_to_be_unique(
|
|||||||
_: ColumnValuesToBeUnique,
|
_: ColumnValuesToBeUnique,
|
||||||
col_profile: ColumnProfile,
|
col_profile: ColumnProfile,
|
||||||
execution_date: datetime,
|
execution_date: datetime,
|
||||||
|
**__,
|
||||||
) -> TestCaseResult:
|
) -> TestCaseResult:
|
||||||
"""
|
"""
|
||||||
Validate Column Values metric
|
Validate Column Values metric
|
||||||
|
|||||||
@ -24,12 +24,24 @@ from functools import singledispatch
|
|||||||
|
|
||||||
from metadata.generated.schema.tests.basic import TestCaseResult
|
from metadata.generated.schema.tests.basic import TestCaseResult
|
||||||
from metadata.orm_profiler.utils import logger
|
from metadata.orm_profiler.utils import logger
|
||||||
|
from metadata.orm_profiler.validations.column.column_values_not_in_set import (
|
||||||
|
column_values_not_in_set,
|
||||||
|
)
|
||||||
from metadata.orm_profiler.validations.column.column_values_to_be_between import (
|
from metadata.orm_profiler.validations.column.column_values_to_be_between import (
|
||||||
column_values_to_be_between,
|
column_values_to_be_between,
|
||||||
)
|
)
|
||||||
|
from metadata.orm_profiler.validations.column.column_values_to_be_not_null import (
|
||||||
|
column_values_to_be_not_null,
|
||||||
|
)
|
||||||
from metadata.orm_profiler.validations.column.column_values_to_be_unique import (
|
from metadata.orm_profiler.validations.column.column_values_to_be_unique import (
|
||||||
column_values_to_be_unique,
|
column_values_to_be_unique,
|
||||||
)
|
)
|
||||||
|
from metadata.orm_profiler.validations.table.table_column_count_to_equal import (
|
||||||
|
table_column_count_to_equal,
|
||||||
|
)
|
||||||
|
from metadata.orm_profiler.validations.table.table_row_count_to_be_between import (
|
||||||
|
table_row_count_to_be_between,
|
||||||
|
)
|
||||||
from metadata.orm_profiler.validations.table.table_row_count_to_equal import (
|
from metadata.orm_profiler.validations.table.table_row_count_to_equal import (
|
||||||
table_row_count_to_equal,
|
table_row_count_to_equal,
|
||||||
)
|
)
|
||||||
@ -38,7 +50,7 @@ logger = logger()
|
|||||||
|
|
||||||
|
|
||||||
@singledispatch
|
@singledispatch
|
||||||
def validate(test_case, *args, **kwargs) -> TestCaseResult:
|
def validate(test_case, **kwargs) -> TestCaseResult:
|
||||||
"""
|
"""
|
||||||
Default function to validate test cases.
|
Default function to validate test cases.
|
||||||
|
|
||||||
@ -51,7 +63,13 @@ def validate(test_case, *args, **kwargs) -> TestCaseResult:
|
|||||||
|
|
||||||
# Table Tests
|
# Table Tests
|
||||||
validate.register(table_row_count_to_equal)
|
validate.register(table_row_count_to_equal)
|
||||||
|
validate.register(table_row_count_to_be_between)
|
||||||
|
validate.register(table_column_count_to_equal)
|
||||||
|
|
||||||
# Column Tests
|
# Column Tests
|
||||||
validate.register(column_values_to_be_between)
|
validate.register(column_values_to_be_between)
|
||||||
validate.register(column_values_to_be_unique)
|
validate.register(column_values_to_be_unique)
|
||||||
|
validate.register(column_values_to_be_not_null)
|
||||||
|
|
||||||
|
# Column Session Tests
|
||||||
|
validate.register(column_values_not_in_set)
|
||||||
|
|||||||
@ -0,0 +1,61 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
TableColumnCountToEqual validation implementation
|
||||||
|
"""
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.table import TableProfile
|
||||||
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
||||||
|
from metadata.generated.schema.tests.table.tableColumnCountToEqual import (
|
||||||
|
TableColumnCountToEqual,
|
||||||
|
)
|
||||||
|
from metadata.orm_profiler.utils import logger
|
||||||
|
|
||||||
|
logger = logger()
|
||||||
|
|
||||||
|
|
||||||
|
def table_column_count_to_equal(
|
||||||
|
test_case: TableColumnCountToEqual,
|
||||||
|
table_profile: TableProfile,
|
||||||
|
execution_date: datetime,
|
||||||
|
**__,
|
||||||
|
) -> TestCaseResult:
|
||||||
|
"""
|
||||||
|
Validate row count metric
|
||||||
|
:param test_case: TableColumnCountToEqual
|
||||||
|
:param table_profile: should contain columnCount metric
|
||||||
|
:param execution_date: Datetime when the tests ran
|
||||||
|
:return: TestCaseResult with status and results
|
||||||
|
"""
|
||||||
|
|
||||||
|
if table_profile.columnCount is None:
|
||||||
|
msg = "columnCount should not be None for TableColumnCountToEqual"
|
||||||
|
logger.error(msg)
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=msg,
|
||||||
|
)
|
||||||
|
|
||||||
|
status = (
|
||||||
|
TestCaseStatus.Success
|
||||||
|
if table_profile.columnCount == test_case.value
|
||||||
|
else TestCaseStatus.Failed
|
||||||
|
)
|
||||||
|
result = (
|
||||||
|
f"Found {table_profile.columnCount} columns vs. the expected {test_case.value}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(), testCaseStatus=status, result=result
|
||||||
|
)
|
||||||
@ -0,0 +1,59 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
TableRowCountToBeBetween validation implementation
|
||||||
|
"""
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.table import TableProfile
|
||||||
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
||||||
|
from metadata.generated.schema.tests.table.tableRowCountToBeBetween import (
|
||||||
|
TableRowCountToBeBetween,
|
||||||
|
)
|
||||||
|
from metadata.orm_profiler.utils import logger
|
||||||
|
|
||||||
|
logger = logger()
|
||||||
|
|
||||||
|
|
||||||
|
def table_row_count_to_be_between(
|
||||||
|
test_case: TableRowCountToBeBetween,
|
||||||
|
table_profile: TableProfile,
|
||||||
|
execution_date: datetime,
|
||||||
|
**__,
|
||||||
|
) -> TestCaseResult:
|
||||||
|
"""
|
||||||
|
Validate row count metric
|
||||||
|
:param test_case: TableRowCountToBeBetween
|
||||||
|
:param table_profile: should contain row count metric
|
||||||
|
:param execution_date: Datetime when the tests ran
|
||||||
|
:return: TestCaseResult with status and results
|
||||||
|
"""
|
||||||
|
|
||||||
|
if table_profile.rowCount is None:
|
||||||
|
msg = "rowCount should not be None for TableRowCountToBeBetween"
|
||||||
|
logger.error(msg)
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=msg,
|
||||||
|
)
|
||||||
|
|
||||||
|
status = (
|
||||||
|
TestCaseStatus.Success
|
||||||
|
if test_case.minValue <= table_profile.rowCount <= test_case.maxValue
|
||||||
|
else TestCaseStatus.Failed
|
||||||
|
)
|
||||||
|
result = f"Found {table_profile.rowCount} rows vs. the expected range [{test_case.minValue}, {test_case.maxValue}]."
|
||||||
|
|
||||||
|
return TestCaseResult(
|
||||||
|
executionTime=execution_date.timestamp(), testCaseStatus=status, result=result
|
||||||
|
)
|
||||||
@ -28,6 +28,7 @@ def table_row_count_to_equal(
|
|||||||
test_case: TableRowCountToEqual,
|
test_case: TableRowCountToEqual,
|
||||||
table_profile: TableProfile,
|
table_profile: TableProfile,
|
||||||
execution_date: datetime,
|
execution_date: datetime,
|
||||||
|
**__,
|
||||||
) -> TestCaseResult:
|
) -> TestCaseResult:
|
||||||
"""
|
"""
|
||||||
Validate row count metric
|
Validate row count metric
|
||||||
|
|||||||
@ -128,7 +128,7 @@ class MetricsTest(TestCase):
|
|||||||
res = profiler.execute()._column_results
|
res = profiler.execute()._column_results
|
||||||
assert res.get(User.nickname.name).get(Metrics.NULL_RATIO.name) == 0.5
|
assert res.get(User.nickname.name).get(Metrics.NULL_RATIO.name) == 0.5
|
||||||
|
|
||||||
def test_table_count(self):
|
def test_table_row_count(self):
|
||||||
"""
|
"""
|
||||||
Check Table Metric run
|
Check Table Metric run
|
||||||
"""
|
"""
|
||||||
@ -137,6 +137,15 @@ class MetricsTest(TestCase):
|
|||||||
res = profiler.execute()._table_results
|
res = profiler.execute()._table_results
|
||||||
assert res.get(Metrics.ROW_COUNT.name) == 2
|
assert res.get(Metrics.ROW_COUNT.name) == 2
|
||||||
|
|
||||||
|
def test_table_column_count(self):
|
||||||
|
"""
|
||||||
|
Check Column Count metric
|
||||||
|
"""
|
||||||
|
col_count = add_props(table=User)(Metrics.COLUMN_COUNT.value)
|
||||||
|
profiler = Profiler(col_count, session=self.session, table=User)
|
||||||
|
res = profiler.execute()._table_results
|
||||||
|
assert res.get(Metrics.COLUMN_COUNT.name) == 6
|
||||||
|
|
||||||
def test_avg(self):
|
def test_avg(self):
|
||||||
"""
|
"""
|
||||||
Check avg for distinct types
|
Check avg for distinct types
|
||||||
@ -456,3 +465,26 @@ class MetricsTest(TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert res.get(User.age.name)[Metrics.UNIQUE_RATIO.name] == 1.0
|
assert res.get(User.age.name)[Metrics.UNIQUE_RATIO.name] == 1.0
|
||||||
|
|
||||||
|
def test_count_in_set(self):
|
||||||
|
"""
|
||||||
|
Check Count In Set metric
|
||||||
|
"""
|
||||||
|
|
||||||
|
set_count = add_props(values=["John"])(Metrics.COUNT_IN_SET.value)
|
||||||
|
res = (
|
||||||
|
Profiler(set_count, session=self.session, table=User, use_cols=[User.name])
|
||||||
|
.execute()
|
||||||
|
._column_results
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res.get(User.name.name)[Metrics.COUNT_IN_SET.name] == 1.0
|
||||||
|
|
||||||
|
set_count = add_props(values=["John", "Jane"])(Metrics.COUNT_IN_SET.value)
|
||||||
|
res = (
|
||||||
|
Profiler(set_count, session=self.session, table=User, use_cols=[User.name])
|
||||||
|
.execute()
|
||||||
|
._column_results
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res.get(User.name.name)[Metrics.COUNT_IN_SET.name] == 2.0
|
||||||
|
|||||||
@ -59,7 +59,7 @@ class ProfilerTest(TestCase):
|
|||||||
cls.session.add_all(data)
|
cls.session.add_all(data)
|
||||||
cls.session.commit()
|
cls.session.commit()
|
||||||
|
|
||||||
def test_simple_profiler(self):
|
def test_default_profiler(self):
|
||||||
"""
|
"""
|
||||||
Check our pre-cooked profiler
|
Check our pre-cooked profiler
|
||||||
"""
|
"""
|
||||||
@ -69,6 +69,7 @@ class ProfilerTest(TestCase):
|
|||||||
profile = simple.get_profile()
|
profile = simple.get_profile()
|
||||||
|
|
||||||
assert profile.rowCount == 2
|
assert profile.rowCount == 2
|
||||||
|
assert profile.columnCount == 5
|
||||||
|
|
||||||
age_profile = next(
|
age_profile = next(
|
||||||
iter(
|
iter(
|
||||||
|
|||||||
126
ingestion/tests/unit/profiler/test_session_validations.py
Normal file
126
ingestion/tests/unit/profiler/test_session_validations.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
# Copyright 2021 Collate
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Test validations that need a session configured to run
|
||||||
|
"""
|
||||||
|
from datetime import datetime
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from sqlalchemy import TEXT, Column, Integer, String, create_engine
|
||||||
|
from sqlalchemy.orm import declarative_base
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.table import ColumnProfile
|
||||||
|
from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
||||||
|
from metadata.generated.schema.tests.column.columnValuesToBeNotInSet import (
|
||||||
|
ColumnValuesToBeNotInSet,
|
||||||
|
)
|
||||||
|
from metadata.orm_profiler.engines import create_and_bind_session
|
||||||
|
from metadata.orm_profiler.validations.core import validate
|
||||||
|
|
||||||
|
EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class User(Base):
|
||||||
|
__tablename__ = "users"
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
name = Column(String(256))
|
||||||
|
fullname = Column(String(256))
|
||||||
|
nickname = Column(String(256))
|
||||||
|
comments = Column(TEXT)
|
||||||
|
age = Column(Integer)
|
||||||
|
|
||||||
|
|
||||||
|
class MetricsTest(TestCase):
|
||||||
|
"""
|
||||||
|
Run checks on different metrics
|
||||||
|
"""
|
||||||
|
|
||||||
|
engine = create_engine("sqlite+pysqlite:///:memory:", echo=False, future=True)
|
||||||
|
session = create_and_bind_session(engine)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls) -> None:
|
||||||
|
"""
|
||||||
|
Prepare Ingredients
|
||||||
|
"""
|
||||||
|
User.__table__.create(bind=cls.engine)
|
||||||
|
|
||||||
|
data = [
|
||||||
|
User(
|
||||||
|
name="John",
|
||||||
|
fullname="John Doe",
|
||||||
|
nickname="johnny b goode",
|
||||||
|
comments="no comments",
|
||||||
|
age=30,
|
||||||
|
),
|
||||||
|
User(
|
||||||
|
name="Jane",
|
||||||
|
fullname="Jone Doe",
|
||||||
|
nickname=None,
|
||||||
|
comments="maybe some comments",
|
||||||
|
age=31,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
cls.session.add_all(data)
|
||||||
|
cls.session.commit()
|
||||||
|
|
||||||
|
def test_column_values_not_in_set(self):
|
||||||
|
"""
|
||||||
|
Check that the metric runs and the results are correctly validated
|
||||||
|
"""
|
||||||
|
column_profile = ColumnProfile(name="name") # column name
|
||||||
|
|
||||||
|
res_ok = validate(
|
||||||
|
ColumnValuesToBeNotInSet(values=["random", "forbidden"]),
|
||||||
|
col_profile=column_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
session=self.session,
|
||||||
|
table=User,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_ok == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Success,
|
||||||
|
result="Found countInSet=0. It should be 0.",
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ko = validate(
|
||||||
|
ColumnValuesToBeNotInSet(values=["John", "forbidden"]),
|
||||||
|
col_profile=column_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
session=self.session,
|
||||||
|
table=User,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_ko == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Failed,
|
||||||
|
result="Found countInSet=1. It should be 0.",
|
||||||
|
)
|
||||||
|
|
||||||
|
res_aborted = validate(
|
||||||
|
ColumnValuesToBeNotInSet(values=["John", "forbidden"]),
|
||||||
|
col_profile=ColumnProfile(name="random"),
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
session=self.session,
|
||||||
|
table=User,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_aborted == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=(
|
||||||
|
"Error computing ColumnValuesToBeNotInSet for random - Cannot find"
|
||||||
|
+ " the configured column random for ColumnValuesToBeNotInSet"
|
||||||
|
),
|
||||||
|
)
|
||||||
@ -22,9 +22,18 @@ from metadata.generated.schema.tests.basic import TestCaseResult, TestCaseStatus
|
|||||||
from metadata.generated.schema.tests.column.columnValuesToBeBetween import (
|
from metadata.generated.schema.tests.column.columnValuesToBeBetween import (
|
||||||
ColumnValuesToBeBetween,
|
ColumnValuesToBeBetween,
|
||||||
)
|
)
|
||||||
|
from metadata.generated.schema.tests.column.columnValuesToBeNotNull import (
|
||||||
|
ColumnValuesToBeNotNull,
|
||||||
|
)
|
||||||
from metadata.generated.schema.tests.column.columnValuesToBeUnique import (
|
from metadata.generated.schema.tests.column.columnValuesToBeUnique import (
|
||||||
ColumnValuesToBeUnique,
|
ColumnValuesToBeUnique,
|
||||||
)
|
)
|
||||||
|
from metadata.generated.schema.tests.table.tableColumnCountToEqual import (
|
||||||
|
TableColumnCountToEqual,
|
||||||
|
)
|
||||||
|
from metadata.generated.schema.tests.table.tableRowCountToBeBetween import (
|
||||||
|
TableRowCountToBeBetween,
|
||||||
|
)
|
||||||
from metadata.generated.schema.tests.table.tableRowCountToEqual import (
|
from metadata.generated.schema.tests.table.tableRowCountToEqual import (
|
||||||
TableRowCountToEqual,
|
TableRowCountToEqual,
|
||||||
)
|
)
|
||||||
@ -82,6 +91,104 @@ def test_table_row_count_to_equal():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_table_row_count_to_be_between():
|
||||||
|
"""
|
||||||
|
Check TableRowCountToEqual
|
||||||
|
"""
|
||||||
|
table_profile = TableProfile(
|
||||||
|
profileDate=EXECUTION_DATE.strftime("%Y-%m-%d"),
|
||||||
|
rowCount=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ok = validate(
|
||||||
|
TableRowCountToBeBetween(minValue=20, maxValue=120),
|
||||||
|
table_profile=table_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
assert res_ok == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Success,
|
||||||
|
result="Found 100.0 rows vs. the expected range [20, 120].",
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ko = validate(
|
||||||
|
TableRowCountToBeBetween(minValue=120, maxValue=200),
|
||||||
|
table_profile=table_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_ko == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Failed,
|
||||||
|
result="Found 100.0 rows vs. the expected range [120, 200].",
|
||||||
|
)
|
||||||
|
|
||||||
|
table_profile_aborted = TableProfile(
|
||||||
|
profileDate=EXECUTION_DATE.strftime("%Y-%m-%d"),
|
||||||
|
)
|
||||||
|
|
||||||
|
res_aborted = validate(
|
||||||
|
TableRowCountToBeBetween(minValue=120, maxValue=200),
|
||||||
|
table_profile=table_profile_aborted,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_aborted == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result="rowCount should not be None for TableRowCountToBeBetween",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_table_column_count_to_equal():
|
||||||
|
"""
|
||||||
|
Check TableRowCountToEqual
|
||||||
|
"""
|
||||||
|
table_profile = TableProfile(
|
||||||
|
profileDate=EXECUTION_DATE.strftime("%Y-%m-%d"),
|
||||||
|
columnCount=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ok = validate(
|
||||||
|
TableColumnCountToEqual(value=5),
|
||||||
|
table_profile=table_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
assert res_ok == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Success,
|
||||||
|
result="Found 5.0 columns vs. the expected 5",
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ko = validate(
|
||||||
|
TableColumnCountToEqual(value=20),
|
||||||
|
table_profile=table_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_ko == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Failed,
|
||||||
|
result="Found 5.0 columns vs. the expected 20",
|
||||||
|
)
|
||||||
|
|
||||||
|
table_profile_aborted = TableProfile(
|
||||||
|
profileDate=EXECUTION_DATE.strftime("%Y-%m-%d"),
|
||||||
|
)
|
||||||
|
|
||||||
|
res_aborted = validate(
|
||||||
|
TableColumnCountToEqual(value=5),
|
||||||
|
table_profile=table_profile_aborted,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_aborted == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result="columnCount should not be None for TableColumnCountToEqual",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_column_values_to_be_between():
|
def test_column_values_to_be_between():
|
||||||
"""
|
"""
|
||||||
Check ColumnValuesToBeBetween
|
Check ColumnValuesToBeBetween
|
||||||
@ -204,3 +311,56 @@ def test_column_values_to_be_unique():
|
|||||||
+ " but got valuesCount=None, uniqueCount=None."
|
+ " but got valuesCount=None, uniqueCount=None."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_column_values_to_be_not_null():
|
||||||
|
"""
|
||||||
|
Check ColumnValuesToBeNotNull
|
||||||
|
"""
|
||||||
|
|
||||||
|
column_profile = ColumnProfile(
|
||||||
|
nullCount=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ok = validate(
|
||||||
|
ColumnValuesToBeNotNull(),
|
||||||
|
col_profile=column_profile,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
assert res_ok == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Success,
|
||||||
|
result=("Found nullCount=0.0. It should be 0."),
|
||||||
|
)
|
||||||
|
|
||||||
|
column_profile_ko = ColumnProfile(
|
||||||
|
nullCount=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
res_ko = validate(
|
||||||
|
ColumnValuesToBeNotNull(),
|
||||||
|
col_profile=column_profile_ko,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_ko == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Failed,
|
||||||
|
result=("Found nullCount=10.0. It should be 0."),
|
||||||
|
)
|
||||||
|
|
||||||
|
column_profile_aborted = ColumnProfile()
|
||||||
|
|
||||||
|
res_aborted = validate(
|
||||||
|
ColumnValuesToBeNotNull(),
|
||||||
|
col_profile=column_profile_aborted,
|
||||||
|
execution_date=EXECUTION_DATE,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res_aborted == TestCaseResult(
|
||||||
|
executionTime=EXECUTION_DATE.timestamp(),
|
||||||
|
testCaseStatus=TestCaseStatus.Aborted,
|
||||||
|
result=(
|
||||||
|
"We expect `nullCount` to be informed on the profiler for ColumnValuesToBeNotNull."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user