mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-10-01 20:04:52 +00:00
Remove useless comments
This commit is contained in:
parent
6ea9c2d8ed
commit
b94c1d39da
@ -343,20 +343,17 @@ class BaseTestValidator(ABC):
|
|||||||
"""
|
"""
|
||||||
from metadata.generated.schema.tests.basic import DimensionValue
|
from metadata.generated.schema.tests.basic import DimensionValue
|
||||||
|
|
||||||
# Auto-calculate failed rows if not provided
|
|
||||||
if failed_rows is None:
|
if failed_rows is None:
|
||||||
failed_rows = total_rows - passed_rows
|
failed_rows = total_rows - passed_rows
|
||||||
|
|
||||||
# Calculate percentages - derive one from the other to ensure they sum to 100%
|
# Derive one percentage from the other to ensure they sum to 100%
|
||||||
if total_rows > 0:
|
if total_rows > 0:
|
||||||
passed_rows_percentage = round(passed_rows / total_rows * 100, 2)
|
passed_rows_percentage = round(passed_rows / total_rows * 100, 2)
|
||||||
# Derive failed percentage to ensure sum equals 100%
|
|
||||||
failed_rows_percentage = round(100 - passed_rows_percentage, 2)
|
failed_rows_percentage = round(100 - passed_rows_percentage, 2)
|
||||||
else:
|
else:
|
||||||
passed_rows_percentage = 0
|
passed_rows_percentage = 0
|
||||||
failed_rows_percentage = 0
|
failed_rows_percentage = 0
|
||||||
|
|
||||||
# Convert dictionary to array of DimensionValue objects
|
|
||||||
dimension_values_array = [
|
dimension_values_array = [
|
||||||
DimensionValue(name=name, value=value)
|
DimensionValue(name=name, value=value)
|
||||||
for name, value in dimension_values.items()
|
for name, value in dimension_values.items()
|
||||||
@ -371,9 +368,7 @@ class BaseTestValidator(ABC):
|
|||||||
failedRows=failed_rows,
|
failedRows=failed_rows,
|
||||||
passedRowsPercentage=passed_rows_percentage,
|
passedRowsPercentage=passed_rows_percentage,
|
||||||
failedRowsPercentage=failed_rows_percentage,
|
failedRowsPercentage=failed_rows_percentage,
|
||||||
impactScore=round(impact_score, 4)
|
impactScore=round(impact_score, 4) if impact_score is not None else None,
|
||||||
if impact_score is not None
|
|
||||||
else None, # Round to 4 decimal places
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return dimension_result
|
return dimension_result
|
||||||
|
@ -107,15 +107,12 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator):
|
|||||||
List[DimensionResult]: List of dimension-specific test results
|
List[DimensionResult]: List of dimension-specific test results
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get dimension columns from test case
|
|
||||||
dimension_columns = self.test_case.dimensionColumns or []
|
dimension_columns = self.test_case.dimensionColumns or []
|
||||||
if not dimension_columns:
|
if not dimension_columns:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Get the column to validate (same as _run_validation)
|
|
||||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||||
|
|
||||||
# Get test parameters (same as _run_validation)
|
|
||||||
allowed_values = self.get_test_case_param_value(
|
allowed_values = self.get_test_case_param_value(
|
||||||
self.test_case.parameterValues, # type: ignore
|
self.test_case.parameterValues, # type: ignore
|
||||||
"allowedValues",
|
"allowedValues",
|
||||||
@ -126,34 +123,27 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator):
|
|||||||
self.test_case.parameterValues, "matchEnum"
|
self.test_case.parameterValues, "matchEnum"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Define the metrics to compute (same as _run_validation)
|
|
||||||
metrics_to_compute = {
|
metrics_to_compute = {
|
||||||
"count_in_set": Metrics.COUNT_IN_SET,
|
"count_in_set": Metrics.COUNT_IN_SET,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add row count metric if match_enum is enabled
|
|
||||||
if match_enum:
|
if match_enum:
|
||||||
metrics_to_compute["row_count"] = Metrics.ROW_COUNT
|
metrics_to_compute["row_count"] = Metrics.ROW_COUNT
|
||||||
|
|
||||||
# Store test parameters for child class
|
|
||||||
test_params = {
|
test_params = {
|
||||||
"allowed_values": allowed_values,
|
"allowed_values": allowed_values,
|
||||||
"match_enum": match_enum,
|
"match_enum": match_enum,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execute separate queries for each dimension column
|
|
||||||
dimension_results = []
|
dimension_results = []
|
||||||
for dimension_column in dimension_columns:
|
for dimension_column in dimension_columns:
|
||||||
try:
|
try:
|
||||||
# Get dimension column object
|
|
||||||
dimension_col = self._get_column_name(dimension_column)
|
dimension_col = self._get_column_name(dimension_column)
|
||||||
|
|
||||||
# Execute dimensional query for this single dimension
|
|
||||||
single_dimension_results = self._execute_dimensional_query(
|
single_dimension_results = self._execute_dimensional_query(
|
||||||
column, dimension_col, metrics_to_compute, test_params
|
column, dimension_col, metrics_to_compute, test_params
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to overall results list (now directly a list)
|
|
||||||
dimension_results.extend(single_dimension_results)
|
dimension_results.extend(single_dimension_results)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
@ -92,34 +92,26 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator):
|
|||||||
List[DimensionResult]: List of dimension-specific test results
|
List[DimensionResult]: List of dimension-specific test results
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get dimension columns from test case
|
|
||||||
dimension_columns = self.test_case.dimensionColumns or []
|
dimension_columns = self.test_case.dimensionColumns or []
|
||||||
if not dimension_columns:
|
if not dimension_columns:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Get the column to validate (same as _run_validation)
|
|
||||||
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
column: Union[SQALikeColumn, Column] = self._get_column_name()
|
||||||
|
|
||||||
# Define the metrics to compute (same as _run_validation)
|
|
||||||
metrics_to_compute = {
|
metrics_to_compute = {
|
||||||
"count": Metrics.COUNT,
|
"count": Metrics.COUNT,
|
||||||
"unique_count": Metrics.UNIQUE_COUNT,
|
"unique_count": Metrics.UNIQUE_COUNT,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execute separate queries for each dimension column
|
|
||||||
dimension_results = []
|
dimension_results = []
|
||||||
for dimension_column in dimension_columns:
|
for dimension_column in dimension_columns:
|
||||||
try:
|
try:
|
||||||
# Get dimension column object
|
|
||||||
dimension_col = self._get_column_name(dimension_column)
|
dimension_col = self._get_column_name(dimension_column)
|
||||||
|
|
||||||
# Execute dimensional query for this single dimension
|
|
||||||
# This will return results grouped by this dimension only
|
|
||||||
single_dimension_results = self._execute_dimensional_query(
|
single_dimension_results = self._execute_dimensional_query(
|
||||||
column, dimension_col, metrics_to_compute
|
column, dimension_col, metrics_to_compute
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to overall results list (now directly a list)
|
|
||||||
dimension_results.extend(single_dimension_results)
|
dimension_results.extend(single_dimension_results)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
@ -132,7 +124,6 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator):
|
|||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(f"Error executing dimensional validation: {exc}")
|
logger.warning(f"Error executing dimensional validation: {exc}")
|
||||||
# Return empty list on error (test continues without dimensions)
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -60,13 +60,11 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
SQALikeColumn: Column object
|
SQALikeColumn: Column object
|
||||||
"""
|
"""
|
||||||
if column_name is None:
|
if column_name is None:
|
||||||
# Get the main column being validated (original behavior)
|
|
||||||
return self.get_column_name(
|
return self.get_column_name(
|
||||||
self.test_case.entityLink.root,
|
self.test_case.entityLink.root,
|
||||||
self.runner,
|
self.runner,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Get a specific column by name (for dimension columns)
|
|
||||||
return self.get_column_name(
|
return self.get_column_name(
|
||||||
column_name,
|
column_name,
|
||||||
self.runner,
|
self.runner,
|
||||||
@ -111,28 +109,21 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
dimension_results = []
|
dimension_results = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract test parameters
|
|
||||||
allowed_values = test_params["allowed_values"]
|
allowed_values = test_params["allowed_values"]
|
||||||
match_enum = test_params["match_enum"]
|
match_enum = test_params["match_enum"]
|
||||||
|
|
||||||
# Get the dataframe
|
|
||||||
dfs = self.runner if isinstance(self.runner, list) else [self.runner]
|
dfs = self.runner if isinstance(self.runner, list) else [self.runner]
|
||||||
df = dfs[0]
|
df = dfs[0]
|
||||||
|
|
||||||
# Group by dimension column
|
|
||||||
grouped = df.groupby(dimension_col.name, dropna=False)
|
grouped = df.groupby(dimension_col.name, dropna=False)
|
||||||
|
|
||||||
# Prepare results dataframe
|
|
||||||
results_data = []
|
results_data = []
|
||||||
|
|
||||||
for dimension_value, group_df in grouped:
|
for dimension_value, group_df in grouped:
|
||||||
# Handle NULL values
|
|
||||||
if pd.isna(dimension_value):
|
if pd.isna(dimension_value):
|
||||||
dimension_value = DIMENSION_NULL_LABEL
|
dimension_value = DIMENSION_NULL_LABEL
|
||||||
else:
|
else:
|
||||||
dimension_value = str(dimension_value)
|
dimension_value = str(dimension_value)
|
||||||
|
|
||||||
# Calculate metrics for this group
|
|
||||||
count_in_set = group_df[column.name].isin(allowed_values).sum()
|
count_in_set = group_df[column.name].isin(allowed_values).sum()
|
||||||
|
|
||||||
if match_enum:
|
if match_enum:
|
||||||
@ -159,32 +150,25 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create DataFrame with results
|
|
||||||
results_df = pd.DataFrame(results_data)
|
results_df = pd.DataFrame(results_data)
|
||||||
|
|
||||||
if not results_df.empty:
|
if not results_df.empty:
|
||||||
# Calculate impact scores
|
|
||||||
results_df = calculate_impact_score_pandas(
|
results_df = calculate_impact_score_pandas(
|
||||||
results_df,
|
results_df,
|
||||||
failed_column=DIMENSION_FAILED_COUNT_KEY,
|
failed_column=DIMENSION_FAILED_COUNT_KEY,
|
||||||
total_column=DIMENSION_TOTAL_COUNT_KEY,
|
total_column=DIMENSION_TOTAL_COUNT_KEY,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Aggregate Others
|
|
||||||
results_df = aggregate_others_pandas(
|
results_df = aggregate_others_pandas(
|
||||||
results_df,
|
results_df,
|
||||||
dimension_column="dimension",
|
dimension_column="dimension",
|
||||||
top_n=DEFAULT_TOP_DIMENSIONS,
|
top_n=DEFAULT_TOP_DIMENSIONS,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process results into DimensionResult objects
|
|
||||||
for _, row in results_df.iterrows():
|
for _, row in results_df.iterrows():
|
||||||
dimension_value = row["dimension"]
|
dimension_value = row["dimension"]
|
||||||
|
|
||||||
# Extract metric values
|
|
||||||
count_in_set = int(row.get("count_in_set", 0))
|
count_in_set = int(row.get("count_in_set", 0))
|
||||||
|
|
||||||
# Follow SQLAlchemy's exact logic
|
|
||||||
if match_enum:
|
if match_enum:
|
||||||
# Enum mode: track actual totals and failures
|
# Enum mode: track actual totals and failures
|
||||||
total_count = int(row.get(DIMENSION_TOTAL_COUNT_KEY, 0))
|
total_count = int(row.get(DIMENSION_TOTAL_COUNT_KEY, 0))
|
||||||
@ -198,7 +182,6 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
|
|
||||||
impact_score = float(row.get("impact_score", 0.0))
|
impact_score = float(row.get("impact_score", 0.0))
|
||||||
|
|
||||||
# Create dimension result
|
|
||||||
dimension_result = self.get_dimension_result_object(
|
dimension_result = self.get_dimension_result_object(
|
||||||
dimension_values={dimension_col.name: dimension_value},
|
dimension_values={dimension_col.name: dimension_value},
|
||||||
test_case_status=self.get_test_case_status(matched),
|
test_case_status=self.get_test_case_status(matched),
|
||||||
|
@ -60,13 +60,11 @@ class ColumnValuesToBeUniqueValidator(
|
|||||||
SQALikeColumn: column
|
SQALikeColumn: column
|
||||||
"""
|
"""
|
||||||
if column_name is None:
|
if column_name is None:
|
||||||
# Get the main column being validated (original behavior)
|
|
||||||
return self.get_column_name(
|
return self.get_column_name(
|
||||||
self.test_case.entityLink.root,
|
self.test_case.entityLink.root,
|
||||||
self.runner,
|
self.runner,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Get a specific column by name (for dimension columns)
|
|
||||||
return self.get_column_name(
|
return self.get_column_name(
|
||||||
column_name,
|
column_name,
|
||||||
self.runner,
|
self.runner,
|
||||||
@ -106,24 +104,18 @@ class ColumnValuesToBeUniqueValidator(
|
|||||||
dimension_results = []
|
dimension_results = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Get the dataframe
|
|
||||||
dfs = self.runner if isinstance(self.runner, list) else [self.runner]
|
dfs = self.runner if isinstance(self.runner, list) else [self.runner]
|
||||||
df = dfs[0]
|
df = dfs[0]
|
||||||
|
|
||||||
# Group by dimension column
|
|
||||||
grouped = df.groupby(dimension_col.name, dropna=False)
|
grouped = df.groupby(dimension_col.name, dropna=False)
|
||||||
|
|
||||||
# Prepare results dataframe
|
|
||||||
results_data = []
|
results_data = []
|
||||||
|
|
||||||
for dimension_value, group_df in grouped:
|
for dimension_value, group_df in grouped:
|
||||||
# Handle NULL values
|
|
||||||
if pd.isna(dimension_value):
|
if pd.isna(dimension_value):
|
||||||
dimension_value = DIMENSION_NULL_LABEL
|
dimension_value = DIMENSION_NULL_LABEL
|
||||||
else:
|
else:
|
||||||
dimension_value = str(dimension_value)
|
dimension_value = str(dimension_value)
|
||||||
|
|
||||||
# Calculate metrics for this group
|
|
||||||
total_count = len(group_df)
|
total_count = len(group_df)
|
||||||
unique_count = group_df[column.name].nunique()
|
unique_count = group_df[column.name].nunique()
|
||||||
duplicate_count = total_count - unique_count
|
duplicate_count = total_count - unique_count
|
||||||
@ -138,29 +130,23 @@ class ColumnValuesToBeUniqueValidator(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create DataFrame with results
|
|
||||||
results_df = pd.DataFrame(results_data)
|
results_df = pd.DataFrame(results_data)
|
||||||
|
|
||||||
if not results_df.empty:
|
if not results_df.empty:
|
||||||
# Calculate impact scores
|
|
||||||
results_df = calculate_impact_score_pandas(
|
results_df = calculate_impact_score_pandas(
|
||||||
results_df,
|
results_df,
|
||||||
failed_column=DIMENSION_FAILED_COUNT_KEY,
|
failed_column=DIMENSION_FAILED_COUNT_KEY,
|
||||||
total_column=DIMENSION_TOTAL_COUNT_KEY,
|
total_column=DIMENSION_TOTAL_COUNT_KEY,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Aggregate Others
|
|
||||||
results_df = aggregate_others_pandas(
|
results_df = aggregate_others_pandas(
|
||||||
results_df,
|
results_df,
|
||||||
dimension_column="dimension",
|
dimension_column="dimension",
|
||||||
top_n=DEFAULT_TOP_DIMENSIONS,
|
top_n=DEFAULT_TOP_DIMENSIONS,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process results into DimensionResult objects
|
|
||||||
for _, row in results_df.iterrows():
|
for _, row in results_df.iterrows():
|
||||||
dimension_value = row["dimension"]
|
dimension_value = row["dimension"]
|
||||||
|
|
||||||
# Extract metric values
|
|
||||||
total_count = int(row.get("count", 0))
|
total_count = int(row.get("count", 0))
|
||||||
unique_count = int(row.get("unique_count", 0))
|
unique_count = int(row.get("unique_count", 0))
|
||||||
duplicate_count = int(row.get(DIMENSION_FAILED_COUNT_KEY, 0))
|
duplicate_count = int(row.get(DIMENSION_FAILED_COUNT_KEY, 0))
|
||||||
|
@ -109,11 +109,9 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
dimension_results = []
|
dimension_results = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract test parameters
|
|
||||||
allowed_values = test_params["allowed_values"]
|
allowed_values = test_params["allowed_values"]
|
||||||
match_enum = test_params["match_enum"]
|
match_enum = test_params["match_enum"]
|
||||||
|
|
||||||
# Build metric expressions dictionary
|
|
||||||
metric_expressions = {}
|
metric_expressions = {}
|
||||||
for metric_name, metric in metrics_to_compute.items():
|
for metric_name, metric in metrics_to_compute.items():
|
||||||
metric_instance = metric.value(column)
|
metric_instance = metric.value(column)
|
||||||
@ -121,7 +119,6 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
metric_instance.values = allowed_values
|
metric_instance.values = allowed_values
|
||||||
metric_expressions[metric_name] = metric_instance.fn()
|
metric_expressions[metric_name] = metric_instance.fn()
|
||||||
|
|
||||||
# Add standardized keys for impact scoring
|
|
||||||
from metadata.data_quality.validations.base_test_handler import (
|
from metadata.data_quality.validations.base_test_handler import (
|
||||||
DIMENSION_FAILED_COUNT_KEY,
|
DIMENSION_FAILED_COUNT_KEY,
|
||||||
DIMENSION_TOTAL_COUNT_KEY,
|
DIMENSION_TOTAL_COUNT_KEY,
|
||||||
@ -142,14 +139,11 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
]
|
]
|
||||||
metric_expressions[DIMENSION_FAILED_COUNT_KEY] = func.literal(0)
|
metric_expressions[DIMENSION_FAILED_COUNT_KEY] = func.literal(0)
|
||||||
|
|
||||||
# Execute with Others aggregation (always use CTEs for impact scoring)
|
|
||||||
result_rows = self._execute_with_others_aggregation(
|
result_rows = self._execute_with_others_aggregation(
|
||||||
dimension_col, metric_expressions, DEFAULT_TOP_DIMENSIONS
|
dimension_col, metric_expressions, DEFAULT_TOP_DIMENSIONS
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process results into DimensionResult objects
|
|
||||||
for row in result_rows:
|
for row in result_rows:
|
||||||
# Extract values using dictionary keys
|
|
||||||
from metadata.data_quality.validations.base_test_handler import (
|
from metadata.data_quality.validations.base_test_handler import (
|
||||||
DIMENSION_NULL_LABEL,
|
DIMENSION_NULL_LABEL,
|
||||||
)
|
)
|
||||||
@ -160,7 +154,6 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
else DIMENSION_NULL_LABEL
|
else DIMENSION_NULL_LABEL
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract metric results - preserve original logic
|
|
||||||
count_in_set = row.get("count_in_set", 0) or 0
|
count_in_set = row.get("count_in_set", 0) or 0
|
||||||
|
|
||||||
# PRESERVE ORIGINAL LOGIC: match_enum determines how we get total_count
|
# PRESERVE ORIGINAL LOGIC: match_enum determines how we get total_count
|
||||||
@ -178,7 +171,6 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
|
|
||||||
impact_score = row.get("impact_score", 0.0)
|
impact_score = row.get("impact_score", 0.0)
|
||||||
|
|
||||||
# Create dimension result using the helper method
|
|
||||||
dimension_result = self.get_dimension_result_object(
|
dimension_result = self.get_dimension_result_object(
|
||||||
dimension_values={dimension_col.name: dimension_value},
|
dimension_values={dimension_col.name: dimension_value},
|
||||||
test_case_status=self.get_test_case_status(matched),
|
test_case_status=self.get_test_case_status(matched),
|
||||||
@ -191,18 +183,13 @@ class ColumnValuesToBeInSetValidator(
|
|||||||
total_rows=total_count,
|
total_rows=total_count,
|
||||||
passed_rows=count_in_set,
|
passed_rows=count_in_set,
|
||||||
failed_rows=failed_count if match_enum else None,
|
failed_rows=failed_count if match_enum else None,
|
||||||
impact_score=impact_score
|
impact_score=impact_score if match_enum else None,
|
||||||
if match_enum
|
|
||||||
else None, # Only include impact score when we have full metrics
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to results list
|
|
||||||
dimension_results.append(dimension_result)
|
dimension_results.append(dimension_result)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
# Use the same error handling pattern as _run_results
|
|
||||||
logger.warning(f"Error executing dimensional query: {exc}")
|
logger.warning(f"Error executing dimensional query: {exc}")
|
||||||
logger.debug("Full error details: ", exc_info=True)
|
logger.debug("Full error details: ", exc_info=True)
|
||||||
# Return empty list on error (test continues without dimensions)
|
|
||||||
|
|
||||||
return dimension_results
|
return dimension_results
|
||||||
|
@ -151,26 +151,21 @@ class ColumnValuesToBeUniqueValidator(
|
|||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Add standardized keys for impact scoring
|
|
||||||
from metadata.data_quality.validations.base_test_handler import (
|
from metadata.data_quality.validations.base_test_handler import (
|
||||||
DIMENSION_FAILED_COUNT_KEY,
|
DIMENSION_FAILED_COUNT_KEY,
|
||||||
DIMENSION_TOTAL_COUNT_KEY,
|
DIMENSION_TOTAL_COUNT_KEY,
|
||||||
)
|
)
|
||||||
|
|
||||||
# For uniqueness test: failed = total - unique (duplicates)
|
|
||||||
metric_expressions[DIMENSION_TOTAL_COUNT_KEY] = metric_expressions["count"]
|
metric_expressions[DIMENSION_TOTAL_COUNT_KEY] = metric_expressions["count"]
|
||||||
metric_expressions[DIMENSION_FAILED_COUNT_KEY] = (
|
metric_expressions[DIMENSION_FAILED_COUNT_KEY] = (
|
||||||
metric_expressions["count"] - metric_expressions["unique_count"]
|
metric_expressions["count"] - metric_expressions["unique_count"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Execute with Others aggregation (always use CTEs for impact scoring)
|
|
||||||
result_rows = self._execute_with_others_aggregation(
|
result_rows = self._execute_with_others_aggregation(
|
||||||
dimension_col, metric_expressions, DEFAULT_TOP_DIMENSIONS
|
dimension_col, metric_expressions, DEFAULT_TOP_DIMENSIONS
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process results into DimensionResult objects
|
|
||||||
for row in result_rows:
|
for row in result_rows:
|
||||||
# Extract values using dictionary keys
|
|
||||||
from metadata.data_quality.validations.base_test_handler import (
|
from metadata.data_quality.validations.base_test_handler import (
|
||||||
DIMENSION_NULL_LABEL,
|
DIMENSION_NULL_LABEL,
|
||||||
)
|
)
|
||||||
@ -181,17 +176,13 @@ class ColumnValuesToBeUniqueValidator(
|
|||||||
else DIMENSION_NULL_LABEL
|
else DIMENSION_NULL_LABEL
|
||||||
)
|
)
|
||||||
|
|
||||||
# Extract metric results
|
|
||||||
total_count = row.get("count", 0) or 0
|
total_count = row.get("count", 0) or 0
|
||||||
unique_count = row.get("unique_count", 0) or 0
|
unique_count = row.get("unique_count", 0) or 0
|
||||||
|
|
||||||
# Calculate duplicate count (failed rows for uniqueness test)
|
|
||||||
duplicate_count = total_count - unique_count
|
duplicate_count = total_count - unique_count
|
||||||
matched = total_count == unique_count
|
matched = total_count == unique_count
|
||||||
|
|
||||||
impact_score = row.get("impact_score", 0.0)
|
impact_score = row.get("impact_score", 0.0)
|
||||||
|
|
||||||
# Create dimension result using the helper method
|
|
||||||
dimension_result = self.get_dimension_result_object(
|
dimension_result = self.get_dimension_result_object(
|
||||||
dimension_values={dimension_col.name: dimension_value},
|
dimension_values={dimension_col.name: dimension_value},
|
||||||
test_case_status=self.get_test_case_status(matched),
|
test_case_status=self.get_test_case_status(matched),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user