mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-27 09:55:36 +00:00
Fix Pandas Dimensionality checks
This commit is contained in:
parent
03ede24eba
commit
6ea9c2d8ed
@ -347,13 +347,14 @@ class BaseTestValidator(ABC):
|
|||||||
if failed_rows is None:
|
if failed_rows is None:
|
||||||
failed_rows = total_rows - passed_rows
|
failed_rows = total_rows - passed_rows
|
||||||
|
|
||||||
# Calculate percentages with rounding to 2 decimal places
|
# Calculate percentages - derive one from the other to ensure they sum to 100%
|
||||||
passed_rows_percentage = (
|
if total_rows > 0:
|
||||||
round(passed_rows / total_rows * 100, 2) if total_rows > 0 else 0
|
passed_rows_percentage = round(passed_rows / total_rows * 100, 2)
|
||||||
)
|
# Derive failed percentage to ensure sum equals 100%
|
||||||
failed_rows_percentage = (
|
failed_rows_percentage = round(100 - passed_rows_percentage, 2)
|
||||||
round(failed_rows / total_rows * 100, 2) if total_rows > 0 else 0
|
else:
|
||||||
)
|
passed_rows_percentage = 0
|
||||||
|
failed_rows_percentage = 0
|
||||||
|
|
||||||
# Convert dictionary to array of DimensionValue objects
|
# Convert dictionary to array of DimensionValue objects
|
||||||
dimension_values_array = [
|
dimension_values_array = [
|
||||||
|
@ -51,7 +51,7 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator):
|
|||||||
try:
|
try:
|
||||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||||
count = self._run_results(Metrics.COUNT, column)
|
count = self._run_results(Metrics.COUNT, column)
|
||||||
unique_count = self._get_unique_count(Metrics.UNIQUE_COUNT, column)
|
unique_count = self._get_unique_count(Metrics.DISTINCT_COUNT, column)
|
||||||
except (ValueError, RuntimeError) as exc:
|
except (ValueError, RuntimeError) as exc:
|
||||||
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
|
@ -143,19 +143,19 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
{
|
{
|
||||||
"dimension": dimension_value,
|
"dimension": dimension_value,
|
||||||
"count_in_set": count_in_set,
|
"count_in_set": count_in_set,
|
||||||
"row_count": row_count,
|
|
||||||
DIMENSION_TOTAL_COUNT_KEY: row_count,
|
DIMENSION_TOTAL_COUNT_KEY: row_count,
|
||||||
DIMENSION_FAILED_COUNT_KEY: failed_count,
|
DIMENSION_FAILED_COUNT_KEY: failed_count,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Non-enum mode
|
# Non-enum mode: we only care about matches, not failures
|
||||||
|
# Following SQLAlchemy's logic exactly
|
||||||
results_data.append(
|
results_data.append(
|
||||||
{
|
{
|
||||||
"dimension": dimension_value,
|
"dimension": dimension_value,
|
||||||
"count_in_set": count_in_set,
|
"count_in_set": count_in_set,
|
||||||
DIMENSION_TOTAL_COUNT_KEY: count_in_set,
|
DIMENSION_TOTAL_COUNT_KEY: count_in_set, # Use count_in_set as total
|
||||||
DIMENSION_FAILED_COUNT_KEY: 0,
|
DIMENSION_FAILED_COUNT_KEY: 0, # Don't track failures in non-enum mode
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -184,14 +184,17 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
# Extract metric values
|
# Extract metric values
|
||||||
count_in_set = int(row.get("count_in_set", 0))
|
count_in_set = int(row.get("count_in_set", 0))
|
||||||
|
|
||||||
|
# Follow SQLAlchemy's exact logic
|
||||||
if match_enum:
|
if match_enum:
|
||||||
total_count = int(row.get("row_count", 0))
|
# Enum mode: track actual totals and failures
|
||||||
|
total_count = int(row.get(DIMENSION_TOTAL_COUNT_KEY, 0))
|
||||||
failed_count = int(row.get(DIMENSION_FAILED_COUNT_KEY, 0))
|
failed_count = int(row.get(DIMENSION_FAILED_COUNT_KEY, 0))
|
||||||
matched = total_count - count_in_set == 0
|
matched = failed_count == 0 # All values must be in enum
|
||||||
else:
|
else:
|
||||||
total_count = count_in_set
|
# Non-enum mode: we only care about matches
|
||||||
failed_count = 0
|
matched = count_in_set > 0 # Pass if ANY values are in set
|
||||||
matched = count_in_set > 0
|
total_count = count_in_set # Use count_in_set as total
|
||||||
|
failed_count = 0 # Don't track failures
|
||||||
|
|
||||||
impact_score = float(row.get("impact_score", 0.0))
|
impact_score = float(row.get("impact_score", 0.0))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user