mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-17 03:38:18 +00:00
fix: compute overall and regex count in the same query (#16962)
This commit is contained in:
parent
b4dc4df811
commit
c8870a0f1c
@ -50,9 +50,8 @@ class BaseColumnValuesToMatchRegexValidator(BaseTestValidator):
|
||||
)
|
||||
try:
|
||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||
count = self._run_results(Metrics.COUNT, column)
|
||||
match_count = self._run_results(
|
||||
Metrics.REGEX_COUNT, column, expression=regex
|
||||
count, match_count = self._run_results(
|
||||
(Metrics.COUNT, Metrics.REGEX_COUNT), column, expression=regex
|
||||
)
|
||||
except (ValueError, RuntimeError) as exc:
|
||||
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
||||
@ -66,7 +65,7 @@ class BaseColumnValuesToMatchRegexValidator(BaseTestValidator):
|
||||
)
|
||||
|
||||
if self.test_case.computePassedFailedRowCount:
|
||||
row_count = self.get_row_count()
|
||||
row_count = count
|
||||
else:
|
||||
row_count = None
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
Validator for column values to match regex test case
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from metadata.data_quality.validations.column.base.columnValuesToMatchRegex import (
|
||||
BaseColumnValuesToMatchRegexValidator,
|
||||
@ -42,15 +42,21 @@ class ColumnValuesToMatchRegexValidator(
|
||||
)
|
||||
|
||||
def _run_results(
|
||||
self, metric: Metrics, column: SQALikeColumn, **kwargs
|
||||
) -> Optional[int]:
|
||||
self, metric: Tuple[Metrics], column: SQALikeColumn, **kwargs
|
||||
) -> Tuple[Optional[int], Optional[int]]:
|
||||
"""compute result of the test case
|
||||
|
||||
Args:
|
||||
metric: metric
|
||||
column: column
|
||||
"""
|
||||
return self.run_dataframe_results(self.runner, metric, column, **kwargs)
|
||||
res = {}
|
||||
for mtr in metric:
|
||||
res[mtr.name] = self.run_dataframe_results(
|
||||
self.runner, mtr, column, **kwargs
|
||||
)
|
||||
|
||||
return res.get(Metrics.COUNT.name), res.get(Metrics.REGEX_COUNT.name)
|
||||
|
||||
def compute_row_count(self, column: SQALikeColumn):
|
||||
"""Compute row count for the given column
|
||||
|
@ -13,7 +13,7 @@
|
||||
Validator for column values to match regex test case
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from sqlalchemy import Column, inspect
|
||||
from sqlalchemy.exc import CompileError, SQLAlchemyError
|
||||
@ -46,7 +46,9 @@ class ColumnValuesToMatchRegexValidator(
|
||||
inspect(self.runner.table).c,
|
||||
)
|
||||
|
||||
def _run_results(self, metric: Metrics, column: Column, **kwargs) -> Optional[int]:
|
||||
def _run_results(
|
||||
self, metric: Tuple[Metrics], column: Column, **kwargs
|
||||
) -> Tuple[Optional[int], Optional[int]]:
|
||||
"""compute result of the test case
|
||||
|
||||
Args:
|
||||
@ -54,15 +56,42 @@ class ColumnValuesToMatchRegexValidator(
|
||||
column: column
|
||||
"""
|
||||
try:
|
||||
return self.run_query_results(self.runner, metric, column, **kwargs)
|
||||
regex_count = Metrics.REGEX_COUNT(column)
|
||||
regex_count.expression = kwargs.get("expression")
|
||||
regex_count_fn = regex_count.fn()
|
||||
|
||||
res = dict(
|
||||
self.runner.dispatch_query_select_first(
|
||||
Metrics.COUNT(column).fn(),
|
||||
regex_count_fn,
|
||||
)
|
||||
)
|
||||
except (CompileError, SQLAlchemyError) as err:
|
||||
logger.warning(
|
||||
f"Could not use `REGEXP` due to - {err}. Falling back to `LIKE`"
|
||||
)
|
||||
return self.run_query_results(
|
||||
self.runner, Metrics.LIKE_COUNT, column, **kwargs
|
||||
regex_count = Metrics.LIKE_COUNT(column)
|
||||
regex_count.expression = kwargs.get("expression")
|
||||
regex_count_fn = regex_count.fn()
|
||||
res = dict(
|
||||
self.runner.dispatch_query_select_first(
|
||||
Metrics.COUNT(column).fn(),
|
||||
regex_count,
|
||||
)
|
||||
)
|
||||
|
||||
if not res:
|
||||
# pylint: disable=line-too-long
|
||||
raise ValueError(
|
||||
f"\nQuery on table/column {column.name if column is not None else ''} returned None. Your table might be empty. "
|
||||
"If you confirmed your table is not empty and are still seeing this message you can:\n"
|
||||
"\t1. check the documentation: https://docs.open-metadata.org/v1.3.x/connectors/ingestion/workflows/data-quality/tests\n"
|
||||
"\t2. reach out to the Collate team for support"
|
||||
)
|
||||
# pylint: enable=line-too-long
|
||||
|
||||
return res.get(Metrics.COUNT.name), res.get(regex_count.name())
|
||||
|
||||
def compute_row_count(self, column: Column):
|
||||
"""Compute row count for the given column
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user