mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-26 09:22:14 +00:00
Fix Pandas Dimensionality checks
This commit is contained in:
parent
03ede24eba
commit
6ea9c2d8ed
@ -347,13 +347,14 @@ class BaseTestValidator(ABC):
|
||||
if failed_rows is None:
|
||||
failed_rows = total_rows - passed_rows
|
||||
|
||||
# Calculate percentages with rounding to 2 decimal places
|
||||
passed_rows_percentage = (
|
||||
round(passed_rows / total_rows * 100, 2) if total_rows > 0 else 0
|
||||
)
|
||||
failed_rows_percentage = (
|
||||
round(failed_rows / total_rows * 100, 2) if total_rows > 0 else 0
|
||||
)
|
||||
# Calculate percentages - derive one from the other to ensure they sum to 100%
|
||||
if total_rows > 0:
|
||||
passed_rows_percentage = round(passed_rows / total_rows * 100, 2)
|
||||
# Derive failed percentage to ensure sum equals 100%
|
||||
failed_rows_percentage = round(100 - passed_rows_percentage, 2)
|
||||
else:
|
||||
passed_rows_percentage = 0
|
||||
failed_rows_percentage = 0
|
||||
|
||||
# Convert dictionary to array of DimensionValue objects
|
||||
dimension_values_array = [
|
||||
|
@ -51,7 +51,7 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator):
|
||||
try:
|
||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||
count = self._run_results(Metrics.COUNT, column)
|
||||
unique_count = self._get_unique_count(Metrics.UNIQUE_COUNT, column)
|
||||
unique_count = self._get_unique_count(Metrics.DISTINCT_COUNT, column)
|
||||
except (ValueError, RuntimeError) as exc:
|
||||
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
|
||||
logger.debug(traceback.format_exc())
|
||||
|
@ -143,19 +143,19 @@ class ColumnValuesToBeInSetValidator(
|
||||
{
|
||||
"dimension": dimension_value,
|
||||
"count_in_set": count_in_set,
|
||||
"row_count": row_count,
|
||||
DIMENSION_TOTAL_COUNT_KEY: row_count,
|
||||
DIMENSION_FAILED_COUNT_KEY: failed_count,
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Non-enum mode
|
||||
# Non-enum mode: we only care about matches, not failures
|
||||
# Following SQLAlchemy's logic exactly
|
||||
results_data.append(
|
||||
{
|
||||
"dimension": dimension_value,
|
||||
"count_in_set": count_in_set,
|
||||
DIMENSION_TOTAL_COUNT_KEY: count_in_set,
|
||||
DIMENSION_FAILED_COUNT_KEY: 0,
|
||||
DIMENSION_TOTAL_COUNT_KEY: count_in_set, # Use count_in_set as total
|
||||
DIMENSION_FAILED_COUNT_KEY: 0, # Don't track failures in non-enum mode
|
||||
}
|
||||
)
|
||||
|
||||
@ -184,14 +184,17 @@ class ColumnValuesToBeInSetValidator(
|
||||
# Extract metric values
|
||||
count_in_set = int(row.get("count_in_set", 0))
|
||||
|
||||
# Follow SQLAlchemy's exact logic
|
||||
if match_enum:
|
||||
total_count = int(row.get("row_count", 0))
|
||||
# Enum mode: track actual totals and failures
|
||||
total_count = int(row.get(DIMENSION_TOTAL_COUNT_KEY, 0))
|
||||
failed_count = int(row.get(DIMENSION_FAILED_COUNT_KEY, 0))
|
||||
matched = total_count - count_in_set == 0
|
||||
matched = failed_count == 0 # All values must be in enum
|
||||
else:
|
||||
total_count = count_in_set
|
||||
failed_count = 0
|
||||
matched = count_in_set > 0
|
||||
# Non-enum mode: we only care about matches
|
||||
matched = count_in_set > 0 # Pass if ANY values are in set
|
||||
total_count = count_in_set # Use count_in_set as total
|
||||
failed_count = 0 # Don't track failures
|
||||
|
||||
impact_score = float(row.get("impact_score", 0.0))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user