Fixes 19119: Enhance TableCustomSQLQueryValidator to support threshold operation (#20307)

This commit is contained in:
Ayush Shah 2025-03-27 13:11:56 +05:30 committed by GitHub
parent 653c878497
commit 7a3990f350
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 63 additions and 3 deletions

View File

@ -24,6 +24,7 @@ from metadata.generated.schema.tests.basic import (
TestCaseStatus,
TestResultValue,
)
from metadata.utils.helpers import evaluate_threshold
from metadata.utils.logger import test_suite_logger
logger = test_suite_logger()
@ -51,6 +52,10 @@ class BaseTableCustomSQLQueryValidator(BaseTestValidator):
str,
)
operator = self.get_test_case_param_value(
self.test_case.parameterValues, "operator", str, "<=" # type: ignore
)
threshold = self.get_test_case_param_value(
self.test_case.parameterValues, # type: ignore
"threshold",
@ -64,6 +69,7 @@ class BaseTableCustomSQLQueryValidator(BaseTestValidator):
Strategy,
)
operator = cast(str, operator) # satisfy mypy
sql_expression = cast(str, sql_expression) # satisfy mypy
threshold = cast(int, threshold) # satisfy mypy
strategy = cast(Strategy, strategy) # satisfy mypy
@ -81,7 +87,11 @@ class BaseTableCustomSQLQueryValidator(BaseTestValidator):
[TestResultValue(name=RESULT_ROW_COUNT, value=None)],
)
len_rows = rows if isinstance(rows, int) else len(rows)
if len_rows <= threshold:
if evaluate_threshold(
threshold,
operator,
len_rows,
):
status = TestCaseStatus.Success
result_value = len_rows
else:

View File

@ -527,3 +527,43 @@ def retry_with_docker_host(config: Optional[WorkflowSource] = None):
def get_query_hash(query: str) -> str:
result = hashlib.md5(query.encode())
return str(result.hexdigest())
def evaluate_threshold(threshold: int, operator: str, result: int) -> bool:
"""Evaluate the threshold against the result.
Args:
threshold: A string representing a comparison threshold (e.g., "< 5", ">= 10").
result: The integer value to compare against the threshold.
Returns:
True if the result satisfies the threshold condition, False otherwise.
If no comparison operator is provided, it defaults to less than or equal to comparison.
Returns False for invalid threshold formats.
"""
import operator as op # pylint: disable=import-outside-toplevel
operators = {
"<": op.lt,
"<=": op.le,
">": op.gt,
">=": op.ge,
"==": op.eq,
"!=": op.ne,
}
op_func = operators.get(operator, op.le)
try:
if op_func:
return op_func(result, threshold)
except ValueError:
return False
# Fallback:
logger.error(
f"Invalid threshold: {threshold}, "
"Allowed format: <, >, <=, >=, ==, !=. Example: >5"
)
raise ValueError(
f"Invalid threshold: {threshold}, "
"Allowed format: <, >, <=, >=, ==, !=. Example: >5"
)

View File

@ -18,7 +18,6 @@ No sample data is required beforehand
"""
import pytest
from ingestion.tests.integration.datalake.conftest import BUCKET_NAME
from metadata.generated.schema.entity.data.table import ColumnProfile, Table
from metadata.utils.time_utils import (
get_beginning_of_day_timestamp_mill,
@ -28,6 +27,8 @@ from metadata.workflow.classification import AutoClassificationWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
from metadata.workflow.workflow_output_handler import WorkflowResultStatus
from .conftest import BUCKET_NAME
@pytest.fixture(scope="class", autouse=True)
def before_each(run_ingestion):

View File

@ -13,11 +13,12 @@
import pytest
from ingestion.tests.integration.datalake.conftest import BUCKET_NAME
from metadata.generated.schema.entity.data.table import DataType, Table
from metadata.ingestion.ometa.models import EntityList
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from .conftest import BUCKET_NAME
class TestDatalake:
"""datalake profiler E2E test"""

View File

@ -21,6 +21,14 @@
"optionValues": ["ROWS", "COUNT"],
"required": false
},
{
"name": "operator",
"displayName": "Operator",
"description": "Operator to use to compare the result of the custom SQL query to the threshold.",
"dataType": "STRING",
"required": false,
"optionValues": ["==", ">", ">=", "<", "<=", "!="]
},
{
"name": "threshold",
"displayName": "Threshold",