mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-14 20:17:05 +00:00
parent
f90f1301d9
commit
a15da7ec98
@ -275,6 +275,7 @@ dev = {
|
||||
# For publishing
|
||||
"twine",
|
||||
"build",
|
||||
*plugins["sample-data"],
|
||||
}
|
||||
|
||||
|
||||
|
@ -46,7 +46,10 @@ class BaseColumnValuesMissingCountValidator(BaseTestValidator):
|
||||
"""
|
||||
try:
|
||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||
null_res = self._run_results(Metrics.NULL_COUNT, column)
|
||||
null_res = self._run_results(
|
||||
Metrics.NULL_MISSING_COUNT,
|
||||
column,
|
||||
)
|
||||
except (ValueError, RuntimeError) as exc:
|
||||
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
||||
logger.debug(traceback.format_exc())
|
||||
|
@ -42,6 +42,7 @@ from metadata.profiler.metrics.static.min_length import MinLength
|
||||
from metadata.profiler.metrics.static.not_like_count import NotLikeCount
|
||||
from metadata.profiler.metrics.static.not_regexp_match_count import NotRegexCount
|
||||
from metadata.profiler.metrics.static.null_count import NullCount
|
||||
from metadata.profiler.metrics.static.null_missing_count import NullMissingCount
|
||||
from metadata.profiler.metrics.static.regexp_match_count import RegexCount
|
||||
from metadata.profiler.metrics.static.row_count import RowCount
|
||||
from metadata.profiler.metrics.static.stddev import StdDev
|
||||
@ -103,3 +104,6 @@ class Metrics(MetricRegistry):
|
||||
|
||||
# Hybrid Metrics
|
||||
HISTOGRAM = Histogram
|
||||
|
||||
# Missing Count
|
||||
NULL_MISSING_COUNT = NullMissingCount
|
||||
|
@ -0,0 +1,73 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Null Count Metric definition
|
||||
"""
|
||||
# pylint: disable=duplicate-code
|
||||
|
||||
|
||||
from sqlalchemy import case, column
|
||||
|
||||
from metadata.generated.schema.configuration.profilerConfiguration import MetricType
|
||||
from metadata.profiler.metrics.core import StaticMetric, _label
|
||||
from metadata.profiler.orm.functions.sum import SumFn
|
||||
|
||||
|
||||
class NullMissingCount(StaticMetric):
|
||||
"""
|
||||
NULL + Empty COUNT Metric
|
||||
|
||||
Given a column, return the null count.
|
||||
|
||||
We are building a CASE WHEN structure:
|
||||
```
|
||||
SUM(
|
||||
CASE is not null THEN 1
|
||||
ELSE 0
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def name(cls):
|
||||
"""
|
||||
Returns the name of the metric.
|
||||
"""
|
||||
return MetricType.nullCount.value
|
||||
|
||||
@property
|
||||
def metric_type(self):
|
||||
"""
|
||||
Returns the type of the metric.
|
||||
"""
|
||||
return int
|
||||
|
||||
@_label
|
||||
def fn(self):
|
||||
"""
|
||||
Returns the SQLAlchemy function for calculating the metric.
|
||||
"""
|
||||
return SumFn(
|
||||
case(
|
||||
[
|
||||
(column(self.col.name, self.col.type).is_(None), 1),
|
||||
(column(self.col.name, self.col.type).__eq__(""), 1),
|
||||
],
|
||||
else_=0,
|
||||
)
|
||||
)
|
||||
|
||||
def df_fn(self, dfs=None):
|
||||
"""
|
||||
Returns the pandas function for calculating the metric.
|
||||
"""
|
||||
return sum(df[self.col.name].isnull().sum() for df in dfs)
|
@ -199,7 +199,7 @@ class ProfilerSource(ProfilerSourceInterface):
|
||||
db_service: Optional[DatabaseService],
|
||||
) -> ProfilerInterface:
|
||||
"""Create sqlalchemy profiler interface"""
|
||||
from metadata.profiler.interface.profiler_interface_factory import (
|
||||
from metadata.profiler.interface.profiler_interface_factory import ( # pylint: disable=import-outside-toplevel
|
||||
profiler_interface_factory,
|
||||
)
|
||||
|
||||
|
@ -102,7 +102,7 @@ def create_sqlite_table():
|
||||
name="John",
|
||||
first_name="Jo",
|
||||
fullname="John Doe",
|
||||
nickname="johnny b goode",
|
||||
nickname="",
|
||||
age=30,
|
||||
inserted_date=datetime.today() - timedelta(days=i),
|
||||
),
|
||||
|
@ -66,8 +66,8 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||
"COLUMN",
|
||||
(
|
||||
TestCaseResult,
|
||||
"0",
|
||||
"8",
|
||||
"14",
|
||||
TestCaseStatus.Failed,
|
||||
20.0,
|
||||
10.0,
|
||||
@ -216,9 +216,9 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||
"COLUMN",
|
||||
(
|
||||
TestCaseResult,
|
||||
"10",
|
||||
"20",
|
||||
None,
|
||||
TestCaseStatus.Success,
|
||||
TestCaseStatus.Failed,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
@ -229,7 +229,7 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||
"test_case_column_values_missing_count_to_be_equal_missing_values",
|
||||
"columnValuesMissingCount",
|
||||
"COLUMN",
|
||||
(TestCaseResult, "20", None, TestCaseStatus.Failed, None, None, None, None),
|
||||
(TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None),
|
||||
),
|
||||
(
|
||||
"test_case_column_values_not_in_set",
|
||||
|
Loading…
x
Reference in New Issue
Block a user