mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-18 11:07:41 +00:00
fix: compute overall and regex count in the same query (#16962)
This commit is contained in:
parent
b4dc4df811
commit
c8870a0f1c
@ -50,9 +50,8 @@ class BaseColumnValuesToMatchRegexValidator(BaseTestValidator):
|
|||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||||
count = self._run_results(Metrics.COUNT, column)
|
count, match_count = self._run_results(
|
||||||
match_count = self._run_results(
|
(Metrics.COUNT, Metrics.REGEX_COUNT), column, expression=regex
|
||||||
Metrics.REGEX_COUNT, column, expression=regex
|
|
||||||
)
|
)
|
||||||
except (ValueError, RuntimeError) as exc:
|
except (ValueError, RuntimeError) as exc:
|
||||||
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
||||||
@ -66,7 +65,7 @@ class BaseColumnValuesToMatchRegexValidator(BaseTestValidator):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if self.test_case.computePassedFailedRowCount:
|
if self.test_case.computePassedFailedRowCount:
|
||||||
row_count = self.get_row_count()
|
row_count = count
|
||||||
else:
|
else:
|
||||||
row_count = None
|
row_count = None
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
Validator for column values to match regex test case
|
Validator for column values to match regex test case
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from metadata.data_quality.validations.column.base.columnValuesToMatchRegex import (
|
from metadata.data_quality.validations.column.base.columnValuesToMatchRegex import (
|
||||||
BaseColumnValuesToMatchRegexValidator,
|
BaseColumnValuesToMatchRegexValidator,
|
||||||
@ -42,15 +42,21 @@ class ColumnValuesToMatchRegexValidator(
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _run_results(
|
def _run_results(
|
||||||
self, metric: Metrics, column: SQALikeColumn, **kwargs
|
self, metric: Tuple[Metrics], column: SQALikeColumn, **kwargs
|
||||||
) -> Optional[int]:
|
) -> Tuple[Optional[int], Optional[int]]:
|
||||||
"""compute result of the test case
|
"""compute result of the test case
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
metric: metric
|
metric: metric
|
||||||
column: column
|
column: column
|
||||||
"""
|
"""
|
||||||
return self.run_dataframe_results(self.runner, metric, column, **kwargs)
|
res = {}
|
||||||
|
for mtr in metric:
|
||||||
|
res[mtr.name] = self.run_dataframe_results(
|
||||||
|
self.runner, mtr, column, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
return res.get(Metrics.COUNT.name), res.get(Metrics.REGEX_COUNT.name)
|
||||||
|
|
||||||
def compute_row_count(self, column: SQALikeColumn):
|
def compute_row_count(self, column: SQALikeColumn):
|
||||||
"""Compute row count for the given column
|
"""Compute row count for the given column
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
Validator for column values to match regex test case
|
Validator for column values to match regex test case
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from sqlalchemy import Column, inspect
|
from sqlalchemy import Column, inspect
|
||||||
from sqlalchemy.exc import CompileError, SQLAlchemyError
|
from sqlalchemy.exc import CompileError, SQLAlchemyError
|
||||||
@ -46,7 +46,9 @@ class ColumnValuesToMatchRegexValidator(
|
|||||||
inspect(self.runner.table).c,
|
inspect(self.runner.table).c,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_results(self, metric: Metrics, column: Column, **kwargs) -> Optional[int]:
|
def _run_results(
|
||||||
|
self, metric: Tuple[Metrics], column: Column, **kwargs
|
||||||
|
) -> Tuple[Optional[int], Optional[int]]:
|
||||||
"""compute result of the test case
|
"""compute result of the test case
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -54,15 +56,42 @@ class ColumnValuesToMatchRegexValidator(
|
|||||||
column: column
|
column: column
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return self.run_query_results(self.runner, metric, column, **kwargs)
|
regex_count = Metrics.REGEX_COUNT(column)
|
||||||
|
regex_count.expression = kwargs.get("expression")
|
||||||
|
regex_count_fn = regex_count.fn()
|
||||||
|
|
||||||
|
res = dict(
|
||||||
|
self.runner.dispatch_query_select_first(
|
||||||
|
Metrics.COUNT(column).fn(),
|
||||||
|
regex_count_fn,
|
||||||
|
)
|
||||||
|
)
|
||||||
except (CompileError, SQLAlchemyError) as err:
|
except (CompileError, SQLAlchemyError) as err:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Could not use `REGEXP` due to - {err}. Falling back to `LIKE`"
|
f"Could not use `REGEXP` due to - {err}. Falling back to `LIKE`"
|
||||||
)
|
)
|
||||||
return self.run_query_results(
|
regex_count = Metrics.LIKE_COUNT(column)
|
||||||
self.runner, Metrics.LIKE_COUNT, column, **kwargs
|
regex_count.expression = kwargs.get("expression")
|
||||||
|
regex_count_fn = regex_count.fn()
|
||||||
|
res = dict(
|
||||||
|
self.runner.dispatch_query_select_first(
|
||||||
|
Metrics.COUNT(column).fn(),
|
||||||
|
regex_count,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not res:
|
||||||
|
# pylint: disable=line-too-long
|
||||||
|
raise ValueError(
|
||||||
|
f"\nQuery on table/column {column.name if column is not None else ''} returned None. Your table might be empty. "
|
||||||
|
"If you confirmed your table is not empty and are still seeing this message you can:\n"
|
||||||
|
"\t1. check the documentation: https://docs.open-metadata.org/v1.3.x/connectors/ingestion/workflows/data-quality/tests\n"
|
||||||
|
"\t2. reach out to the Collate team for support"
|
||||||
|
)
|
||||||
|
# pylint: enable=line-too-long
|
||||||
|
|
||||||
|
return res.get(Metrics.COUNT.name), res.get(regex_count.name())
|
||||||
|
|
||||||
def compute_row_count(self, column: Column):
|
def compute_row_count(self, column: Column):
|
||||||
"""Compute row count for the given column
|
"""Compute row count for the given column
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user