diff --git a/bootstrap/sql/migrations/native/1.3.0/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.3.0/mysql/postDataMigrationSQLScript.sql index 31e8e3d7a05..5cb9ff68c1c 100644 --- a/bootstrap/sql/migrations/native/1.3.0/mysql/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.3.0/mysql/postDataMigrationSQLScript.sql @@ -18,3 +18,17 @@ WHERE json -> '$.testCaseResult.testCaseFailureStatus' IS NOT NULL; UPDATE data_quality_data_time_series d SET json = JSON_REMOVE(json, '$.testCaseFailureStatus'); -- END: Incident Manager Migration + +-- Test Case passed/failed row level migration +UPDATE test_definition +SET json = JSON_SET(json, '$.supportsRowLevelPassedFailed', true) +WHERE name IN ( + 'columnValuesToBeUnique', + 'columnValueLengthsToBeBetween', + 'columnValuesToBeBetween', + 'columnValuesToBeInSet', + 'columnValuesToBeNotInSet', + 'columnValuesToBeNotNull', + 'columnValuesToMatchRegex', + 'columnValuesToNotMatchRegex' +); \ No newline at end of file diff --git a/bootstrap/sql/migrations/native/1.3.0/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.3.0/postgres/postDataMigrationSQLScript.sql index 9a8da76d669..97c79be5dfd 100644 --- a/bootstrap/sql/migrations/native/1.3.0/postgres/postDataMigrationSQLScript.sql +++ b/bootstrap/sql/migrations/native/1.3.0/postgres/postDataMigrationSQLScript.sql @@ -14,4 +14,18 @@ SET json = json::jsonb#-'{testCaseResult,testCaseFailureStatus}'; -- STEP 2: remove all `testCaseFailureStatus` field in test results UPDATE data_quality_data_time_series d SET json = json::jsonb#-'{testCaseFailureStatus}'; --- END: Incident Manager Migration \ No newline at end of file +-- END: Incident Manager Migration + +-- Test Case passed/failed row level migration +UPDATE test_definition +SET json = JSONB_SET(json, '{supportsRowLevelPassedFailed}', 'true', true) +WHERE name IN ( + 'columnValuesToBeUnique', + 'columnValueLengthsToBeBetween', + 'columnValuesToBeBetween', + 'columnValuesToBeInSet', + 'columnValuesToBeNotInSet', + 'columnValuesToBeNotNull', + 'columnValuesToMatchRegex', + 'columnValuesToNotMatchRegex' +); \ No newline at end of file diff --git a/ingestion/src/metadata/data_quality/validations/base_test_handler.py b/ingestion/src/metadata/data_quality/validations/base_test_handler.py index 3e423f55b6a..067a85f4c3c 100644 --- a/ingestion/src/metadata/data_quality/validations/base_test_handler.py +++ b/ingestion/src/metadata/data_quality/validations/base_test_handler.py @@ -85,12 +85,15 @@ class BaseTestValidator(ABC): pre_processed_value = pre_processor(value) return type_(pre_processed_value) - def get_test_case_result_object( + def get_test_case_result_object( # pylint: disable=too-many-arguments self, execution_date: Union[datetime, float], status: TestCaseStatus, result: str, test_result_value: List[TestResultValue], + row_count: Optional[int] = None, + failed_rows: Optional[int] = None, + passed_rows: Optional[int] = None, ) -> TestCaseResult: """Returns a TestCaseResult object with the given args @@ -102,7 +105,7 @@ class BaseTestValidator(ABC): Returns: TestCaseResult: """ - return TestCaseResult( + test_case_result = TestCaseResult( timestamp=execution_date, # type: ignore testCaseStatus=status, result=result, @@ -110,6 +113,22 @@ class BaseTestValidator(ABC): sampleData=None, ) + if (row_count is not None) and ( + # we'll need at least one of these to be not None to compute the other + (failed_rows is not None) + or (passed_rows is not None) + ): + passed_rows = passed_rows if passed_rows is not None else (row_count - failed_rows) # type: ignore + failed_rows = ( + failed_rows if failed_rows is not None else (row_count - passed_rows) + ) + test_case_result.passedRows = passed_rows + test_case_result.failedRows = failed_rows + test_case_result.passedRowsPercentage = (passed_rows / row_count) * 100 + test_case_result.failedRowsPercentage = (failed_rows / row_count) * 100 # type: ignore + + return test_case_result + def format_column_list(self, status: TestCaseStatus, cols: List): """Format column list based on the test status diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValueLengthsToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValueLengthsToBeBetween.py index e946f8d4db1..3dfd8663c12 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValueLengthsToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValueLengthsToBeBetween.py @@ -15,7 +15,7 @@ Validator for column value length to be between test case import traceback from abc import abstractmethod -from typing import Union +from typing import Tuple, Union from sqlalchemy import Column @@ -65,6 +65,13 @@ class BaseColumnValueLengthsToBeBetweenValidator(BaseTestValidator): min_bound = self.get_min_bound("minLength") max_bound = self.get_max_bound("maxLength") + if self.test_case.computePassedFailedRowCount: + row_count, failed_rows = self.compute_row_count( + column, min_bound, max_bound + ) + else: + row_count, failed_rows = None, None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(min_bound <= min_res and max_bound >= max_res), @@ -74,6 +81,8 @@ class BaseColumnValueLengthsToBeBetweenValidator(BaseTestValidator): TestResultValue(name=MIN, value=str(min_res)), TestResultValue(name=MAX, value=str(max_res)), ], + row_count=row_count, + failed_rows=failed_rows, ) @abstractmethod @@ -83,3 +92,31 @@ class BaseColumnValueLengthsToBeBetweenValidator(BaseTestValidator): @abstractmethod def _run_results(self, metric: Metrics, column: Union[SQALikeColumn, Column]): raise NotImplementedError + + @abstractmethod + def compute_row_count( + self, column: Union[SQALikeColumn, Column], min_bound, max_bound + ): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self, min_bound, max_bound) -> Tuple[int, int]: + """Get row count + + Args: + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name(), min_bound, max_bound) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeBetween.py index f4dd54bf38b..c436c2dd7bf 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeBetween.py @@ -16,7 +16,7 @@ Validator for column values to be between test case import traceback from abc import abstractmethod from datetime import date, datetime, time -from typing import Union +from typing import Tuple, Union from sqlalchemy import Column @@ -100,6 +100,11 @@ class BaseColumnValuesToBeBetweenValidator(BaseTestValidator): pre_processor=convert_timestamp if is_date_time(column.type) else None, ) + if self.test_case.computePassedFailedRowCount: + row_count, failed_rows = self.get_row_count(min_bound, max_bound) + else: + row_count, failed_rows = None, None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(min_res >= min_bound and max_res <= max_bound), @@ -108,6 +113,8 @@ class BaseColumnValuesToBeBetweenValidator(BaseTestValidator): TestResultValue(name=MIN, value=str(min_res)), TestResultValue(name=MAX, value=str(max_res)), ], + row_count=row_count, + failed_rows=failed_rows, ) @abstractmethod @@ -117,3 +124,31 @@ class BaseColumnValuesToBeBetweenValidator(BaseTestValidator): @abstractmethod def _run_results(self, metric: Metrics, column: Union[SQALikeColumn, Column]): raise NotImplementedError + + @abstractmethod + def compute_row_count( + self, column: Union[SQALikeColumn, Column], min_bound, max_bound + ): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self, min_bound, max_bound) -> Tuple[int, int]: + """Get row count + + Args: + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name(), min_bound, max_bound) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py index ed6a5b1a00d..da732bf5588 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeInSet.py @@ -64,11 +64,18 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator): [TestResultValue(name=ALLOWED_VALUE_COUNT, value=None)], ) + if self.test_case.computePassedFailedRowCount: + row_count = self.get_row_count() + else: + row_count = None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(res >= 1), f"Found countInSet={res}.", [TestResultValue(name=ALLOWED_VALUE_COUNT, value=str(res))], + row_count=row_count, + passed_rows=res, ) @abstractmethod @@ -80,3 +87,23 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator): self, metric: Metrics, column: Union[SQALikeColumn, Column], **kwargs ): raise NotImplementedError + + @abstractmethod + def compute_row_count(self, column: Union[SQALikeColumn, Column]): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self) -> int: + """Get row count + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name()) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotInSet.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotInSet.py index d1c07f6fdde..bff9d0bd628 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotInSet.py @@ -70,11 +70,18 @@ class BaseColumnValuesToBeNotInSetValidator(BaseTestValidator): [TestResultValue(name=COUNT_FORBIDDEN_VALUES, value=None)], ) + if self.test_case.computePassedFailedRowCount: + row_count = self.get_row_count() + else: + row_count = None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(res == 0), f"Found countInSet={res}. It should be 0", [TestResultValue(name=COUNT_FORBIDDEN_VALUES, value=str(res))], + row_count=row_count, + failed_rows=res, ) @abstractmethod @@ -86,3 +93,23 @@ class BaseColumnValuesToBeNotInSetValidator(BaseTestValidator): self, metric: Metrics, column: Union[SQALikeColumn, Column], **kwargs ): raise NotImplementedError + + @abstractmethod + def compute_row_count(self, column: Union[SQALikeColumn, Column]): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self) -> int: + """Get row count + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name()) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotNull.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotNull.py index b9cf74dfdb4..d68587c3b1d 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotNull.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeNotNull.py @@ -57,11 +57,18 @@ class BaseColumnValuesToBeNotNullValidator(BaseTestValidator): [TestResultValue(name=NULL_COUNT, value=None)], ) + if self.test_case.computePassedFailedRowCount: + row_count = self.get_row_count() + else: + row_count = None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(res == 0), f"Found nullCount={res}. It should be 0", [TestResultValue(name=NULL_COUNT, value=str(res))], + row_count=row_count, + failed_rows=res, ) @abstractmethod @@ -71,3 +78,23 @@ class BaseColumnValuesToBeNotNullValidator(BaseTestValidator): @abstractmethod def _run_results(self, metric: Metrics, column: Union[SQALikeColumn, Column]): raise NotImplementedError + + @abstractmethod + def compute_row_count(self, column: Union[SQALikeColumn, Column]): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self) -> int: + """Get row count + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name()) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py index 66bf1148309..b0483b7d1ef 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToBeUnique.py @@ -71,6 +71,8 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator): TestResultValue(name=VALUE_COUNT, value=str(count)), TestResultValue(name=UNIQUE_COUNT, value=str(unique_count)), ], + row_count=count, + passed_rows=unique_count, ) @abstractmethod @@ -79,8 +81,21 @@ class BaseColumnValuesToBeUniqueValidator(BaseTestValidator): @abstractmethod def _run_results(self, metric: Metrics, column: Union[SQALikeColumn, Column]): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ raise NotImplementedError @abstractmethod def _get_unique_count(self, metric: Metrics, column: Union[SQALikeColumn, Column]): + """Get row count + + Returns: + Tuple[int, int]: + """ raise NotImplementedError diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToMatchRegex.py index dfcfd1a0983..d52d6bb3d94 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToMatchRegex.py @@ -65,11 +65,18 @@ class BaseColumnValuesToMatchRegexValidator(BaseTestValidator): [TestResultValue(name=LIKE_COUNT, value=None)], ) + if self.test_case.computePassedFailedRowCount: + row_count = self.get_row_count() + else: + row_count = None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(count == match_count), f"Found {match_count} value(s) matching regex pattern vs {count} value(s) in the column.", [TestResultValue(name=LIKE_COUNT, value=str(match_count))], + row_count=row_count, + passed_rows=match_count, ) @abstractmethod @@ -81,3 +88,23 @@ class BaseColumnValuesToMatchRegexValidator(BaseTestValidator): self, metric: Metrics, column: Union[SQALikeColumn, Column], **kwargs ): raise NotImplementedError + + @abstractmethod + def compute_row_count(self, column: Union[SQALikeColumn, Column]): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self) -> int: + """Get row count + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name()) diff --git a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToNotMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToNotMatchRegex.py index afc13ff6436..3980bfc765e 100644 --- a/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToNotMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/base/columnValuesToNotMatchRegex.py @@ -64,12 +64,19 @@ class BaseColumnValuesToNotMatchRegexValidator(BaseTestValidator): [TestResultValue(name=NOT_LIKE_COUNT, value=None)], ) + if self.test_case.computePassedFailedRowCount: + row_count = self.get_row_count() + else: + row_count = None + return self.get_test_case_result_object( self.execution_date, self.get_test_case_status(not not_match_count), f"Found {not_match_count} value(s) matching the forbidden regex pattern vs " f"{not_match_count} value(s) in the column.", [TestResultValue(name=NOT_LIKE_COUNT, value=str(not_match_count))], + row_count=row_count, + failed_rows=not_match_count, ) @abstractmethod @@ -81,3 +88,23 @@ class BaseColumnValuesToNotMatchRegexValidator(BaseTestValidator): self, metric: Metrics, column: Union[SQALikeColumn, Column], **kwargs ): raise NotImplementedError + + @abstractmethod + def compute_row_count(self, column: Union[SQALikeColumn, Column]): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + raise NotImplementedError + + def get_row_count(self) -> int: + """Get row count + + Returns: + Tuple[int, int]: + """ + return self.compute_row_count(self._get_column_name()) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py index 82e62460611..27093d71950 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py @@ -50,3 +50,26 @@ class ColumnValueLengthsToBeBetweenValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column) + + def compute_row_count(self, column: SQALikeColumn, min_bound: int, max_bound: int): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Raises: + NotImplementedError: + """ + row_count = self._compute_row_count(self.runner, column) + failed_rows = sum( + len( + runner.query( + f"`{column.name}`.str.len() > {max_bound} or `{column.name}`.str.len() < {min_bound}" + ) + ) + for runner in self.runner # type: ignore + ) + + return row_count, failed_rows diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py index fa3c9c5f5c6..61c33c077d9 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py @@ -49,3 +49,26 @@ class ColumnValuesToBeBetweenValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column) + + def compute_row_count(self, column: SQALikeColumn, min_bound: int, max_bound: int): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Raises: + NotImplementedError: + """ + row_count = self._compute_row_count(self.runner, column) + failed_rows = sum( + len( + runner.query( + f"`{column.name}` > {max_bound} or `{column.name}` < {min_bound}" + ) + ) + for runner in self.runner # type: ignore + ) + + return row_count, failed_rows diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py index 06376182b5c..6048be95381 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py @@ -54,3 +54,14 @@ class ColumnValuesToBeInSetValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column, **kwargs) + + def compute_row_count(self, column: SQALikeColumn): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py index c9c74f65949..004547c509a 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py @@ -51,3 +51,14 @@ class ColumnValuesToBeNotInSetValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column, **kwargs) + + def compute_row_count(self, column: SQALikeColumn): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py index 226dbd1d8ac..60da0a2f77d 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py @@ -49,3 +49,14 @@ class ColumnValuesToBeNotNullValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column) + + def compute_row_count(self, column: SQALikeColumn): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py index c7fa2e1d5d0..f5bfe8e2ac7 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py @@ -51,3 +51,14 @@ class ColumnValuesToMatchRegexValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column, **kwargs) + + def compute_row_count(self, column: SQALikeColumn): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py index ce95c852a91..26ec50f3b59 100644 --- a/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py @@ -51,3 +51,14 @@ class ColumnValuesToNotMatchRegexValidator( column: column """ return self.run_dataframe_results(self.runner, metric, column, **kwargs) + + def compute_row_count(self, column: SQALikeColumn): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py index 788f9ac1fe4..72f51fafae2 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py @@ -25,6 +25,7 @@ from metadata.data_quality.validations.mixins.sqa_validator_mixin import ( SQAValidatorMixin, ) from metadata.profiler.metrics.registry import Metrics +from metadata.profiler.orm.functions.length import LenFn class ColumnValueLengthsToBeBetweenValidator( @@ -51,3 +52,29 @@ class ColumnValueLengthsToBeBetweenValidator( column: column """ return self.run_query_results(self.runner, metric, column) + + def compute_row_count(self, column: Column, min_bound: int, max_bound: int): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Raises: + NotImplementedError: + """ + row_count = self._compute_row_count(self.runner, column) + failed_rows = self._compute_row_count_between( + self.runner, + column, + { + "filters": [ + (LenFn(column), "gt", max_bound), + (LenFn(column), "lt", min_bound), + ], + "or_filter": True, + }, + ) + + return row_count, failed_rows diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py index 3141c917cda..d844263c995 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py @@ -50,3 +50,26 @@ class ColumnValuesToBeBetweenValidator( column: column """ return self.run_query_results(self.runner, metric, column) + + def compute_row_count(self, column: Column, min_bound: int, max_bound: int): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + min_bound (_type_): min bound to filter out rows within the bound + max_bound (_type_): max bound to filter out rows within the bound + + Raises: + NotImplementedError: + """ + row_count = self._compute_row_count(self.runner, column) + failed_rows = self._compute_row_count_between( + self.runner, + column, + { + "filters": [(column, "gt", max_bound), (column, "lt", min_bound)], + "or_filter": True, + }, + ) + + return row_count, failed_rows diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py index f1b5cf9b3cf..2bd603b46b5 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py @@ -50,3 +50,14 @@ class ColumnValuesToBeInSetValidator( column: column """ return self.run_query_results(self.runner, metric, column, **kwargs) + + def compute_row_count(self, column: Column): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py index c2486bca481..58bbeddf305 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py @@ -50,3 +50,14 @@ class ColumnValuesToBeNotInSetValidator( column: column """ return self.run_query_results(self.runner, metric, column, **kwargs) + + def compute_row_count(self, column: Column): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py index 94b99e5f2c6..cffa2287126 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py @@ -53,3 +53,14 @@ class ColumnValuesToBeNotNullValidator( column: column """ return self.run_query_results(self.runner, metric, column) + + def compute_row_count(self, column: Column): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py index 3c79591454b..8dbf3c141c0 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py @@ -62,3 +62,14 @@ class ColumnValuesToMatchRegexValidator( return self.run_query_results( self.runner, Metrics.LIKE_COUNT, column, **kwargs ) + + def compute_row_count(self, column: Column): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py index 52f0383f6f5..9cce7cbcdac 100644 --- a/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py +++ b/ingestion/src/metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py @@ -62,3 +62,14 @@ class ColumnValuesToNotMatchRegexValidator( return self.run_query_results( self.runner, Metrics.NOT_LIKE_COUNT, column, **kwargs ) + + def compute_row_count(self, column: Column): + """Compute row count for the given column + + Args: + column (Union[SQALikeColumn, Column]): column to compute row count for + + Raises: + NotImplementedError: + """ + return self._compute_row_count(self.runner, column) diff --git a/ingestion/src/metadata/data_quality/validations/mixins/pandas_validator_mixin.py b/ingestion/src/metadata/data_quality/validations/mixins/pandas_validator_mixin.py index 3ab7aed4a3c..227d490349f 100644 --- a/ingestion/src/metadata/data_quality/validations/mixins/pandas_validator_mixin.py +++ b/ingestion/src/metadata/data_quality/validations/mixins/pandas_validator_mixin.py @@ -59,3 +59,12 @@ class PandasValidatorMixin: return metric_fn(runner) except Exception as exc: raise RuntimeError(exc) + + def _compute_row_count(self, runner, column: SQALikeColumn, **kwargs): + """compute row count + + Args: + runner (List[DataFrame]): runner to run the test case against) + column (SQALikeColumn): column to compute row count for + """ + return self.run_dataframe_results(runner, Metrics.ROW_COUNT, column, **kwargs) diff --git a/ingestion/src/metadata/data_quality/validations/mixins/sqa_validator_mixin.py b/ingestion/src/metadata/data_quality/validations/mixins/sqa_validator_mixin.py index ddae130f2ce..19f0dcd7fa6 100644 --- a/ingestion/src/metadata/data_quality/validations/mixins/sqa_validator_mixin.py +++ b/ingestion/src/metadata/data_quality/validations/mixins/sqa_validator_mixin.py @@ -83,3 +83,43 @@ class SQAValidatorMixin: ) return res + + def _compute_row_count_between( + self, + runner: QueryRunner, + column: Column, + query_filter: dict, + ): + """compute row count for between tests + + Args: + runner (QueryRunner): runner + column (Column): column + query_filter (dict): filter to apply to the query + + Raises: + SQLAlchemyError: + + Returns: + """ + try: + value = dict( + runner.dispatch_query_select_first( + Metrics.ROW_COUNT(column).fn(), + query_filter_=query_filter, + ) + ) + res = value.get(Metrics.ROW_COUNT.name) + except Exception as exc: + raise SQLAlchemyError(exc) + + return res + + def _compute_row_count(self, runner: QueryRunner, column: Column, **kwargs): + """compute row count + + Args: + runner (QueryRunner): runner to run the test case against) + column (SQALikeColumn): column to compute row count for + """ + return self.run_query_results(runner, Metrics.ROW_COUNT, column, **kwargs) diff --git a/ingestion/src/metadata/ingestion/source/database/mssql/utils.py b/ingestion/src/metadata/ingestion/source/database/mssql/utils.py index 3b1caed19ec..0a8fb85377a 100644 --- a/ingestion/src/metadata/ingestion/source/database/mssql/utils.py +++ b/ingestion/src/metadata/ingestion/source/database/mssql/utils.py @@ -327,29 +327,33 @@ def get_pk_constraint( This function overrides to get pk constraint """ pkeys = [] - tc = ischema.constraints - c = ischema.key_constraints.alias("C") + tc_ = ischema.constraints + c_key_constaint = ischema.key_constraints.alias("C") # Primary key constraints - s = ( - sql.select(c.c.column_name, tc.c.constraint_type, c.c.constraint_name) + query_ = ( + sql.select( + c_key_constaint.c.column_name, + tc_.c.constraint_type, + c_key_constaint.c.constraint_name, + ) .where( sql.and_( - tc.c.constraint_name == c.c.constraint_name, - tc.c.table_schema == c.c.table_schema, - c.c.table_name == tablename, - c.c.table_schema == owner, + tc_.c.constraint_name == c_key_constaint.c.constraint_name, + tc_.c.table_schema == c_key_constaint.c.table_schema, + c_key_constaint.c.table_name == tablename, + c_key_constaint.c.table_schema == owner, ), ) - .order_by(tc.c.constraint_name, c.c.ordinal_position) + .order_by(tc_.c.constraint_name, c_key_constaint.c.ordinal_position) ) - cursor = connection.execution_options(future_result=True).execute(s) + cursor = connection.execution_options(future_result=True).execute(query_) constraint_name = None for row in cursor.mappings(): - if "PRIMARY" in row[tc.c.constraint_type.name]: + if "PRIMARY" in row[tc_.c.constraint_type.name]: pkeys.append(row["COLUMN_NAME"]) if constraint_name is None: - constraint_name = row[c.c.constraint_name.name] + constraint_name = row[c_key_constaint.c.constraint_name.name] return {"constrained_columns": pkeys, "name": constraint_name} @@ -366,7 +370,7 @@ def get_foreign_keys( """ This function overrides to get foreign key constraint """ - s = ( + query_ = ( text(MSSQL_GET_FOREIGN_KEY) .bindparams( sql.bindparam("tablename", tablename, ischema.CoerceUnicode()), @@ -399,7 +403,7 @@ def get_foreign_keys( fkeys = util.defaultdict(fkey_rec) - for r in connection.execute(s).fetchall(): + for row_ in connection.execute(query_).fetchall(): ( _, # constraint schema rfknm, @@ -414,7 +418,7 @@ def get_foreign_keys( _, # match rule fkuprule, fkdelrule, - ) = r + ) = row_ rec = fkeys[rfknm] rec["name"] = rfknm @@ -449,7 +453,7 @@ def get_table_names( self, connection, dbname, owner, schema, **kw ): # pylint: disable=unused-argument tables = ischema.tables - s = ( + query_ = ( sql.select(tables.c.table_name) .where( sql.and_( @@ -459,7 +463,7 @@ def get_table_names( ) .order_by(tables.c.table_name) ) - table_names = [r[0] for r in connection.execute(s)] + table_names = [r[0] for r in connection.execute(query_)] return table_names @@ -469,7 +473,7 @@ def get_view_names( self, connection, dbname, owner, schema, **kw ): # pylint: disable=unused-argument tables = ischema.tables - s = ( + query_ = ( sql.select(tables.c.table_name) .where( sql.and_( @@ -479,5 +483,5 @@ def get_view_names( ) .order_by(tables.c.table_name) ) - view_names = [r[0] for r in connection.execute(s)] + view_names = [r[0] for r in connection.execute(query_)] return view_names diff --git a/ingestion/src/metadata/ingestion/source/pipeline/airflow/lineage_parser.py b/ingestion/src/metadata/ingestion/source/pipeline/airflow/lineage_parser.py index 8e74577845a..e9421ab5031 100644 --- a/ingestion/src/metadata/ingestion/source/pipeline/airflow/lineage_parser.py +++ b/ingestion/src/metadata/ingestion/source/pipeline/airflow/lineage_parser.py @@ -135,16 +135,16 @@ class XLets(BaseModel): def concat_dict_values( - d1: DefaultDict[str, List[Any]], d2: Optional[Dict[str, List[Any]]] + dict_1: DefaultDict[str, List[Any]], dict_2: Optional[Dict[str, List[Any]]] ) -> DefaultDict[str, List[Any]]: """ Update d1 based on d2 values concatenating their results. """ - if d2: - for key, value in d2.items(): - d1[key] = d1[key] + value + if dict_2: + for key, value in dict_2.items(): + dict_1[key] = dict_1[key] + value - return d1 + return dict_1 def parse_xlets(xlet: List[Any]) -> Optional[Dict[str, List[OMEntity]]]: diff --git a/ingestion/tests/unit/test_suite/conftest.py b/ingestion/tests/unit/test_suite/conftest.py index 600f45b049e..a2c906cde98 100644 --- a/ingestion/tests/unit/test_suite/conftest.py +++ b/ingestion/tests/unit/test_suite/conftest.py @@ -144,6 +144,7 @@ def test_case_column_value_length_to_be_between(): TestCaseParameterValue(name="minLength", value="1"), TestCaseParameterValue(name="maxLength", value="10"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -159,6 +160,7 @@ def test_case_column_value_length_to_be_between_col_space(): TestCaseParameterValue(name="minLength", value="1"), TestCaseParameterValue(name="maxLength", value="10"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -173,6 +175,7 @@ def test_case_column_value_length_to_be_between_no_min(): parameterValues=[ TestCaseParameterValue(name="maxLength", value="10"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -318,6 +321,7 @@ def test_case_column_value_in_set(): parameterValues=[ TestCaseParameterValue(name="allowedValues", value="['John']"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -361,6 +365,7 @@ def test_case_column_values_not_in_set(): parameterValues=[ TestCaseParameterValue(name="forbiddenValues", value="['John']"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -391,6 +396,7 @@ def test_case_column_values_to_be_between(): TestCaseParameterValue(name="minValue", value="29"), TestCaseParameterValue(name="maxValue", value="33"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -402,6 +408,7 @@ def test_case_column_values_to_be_not_null(): entityLink=ENTITY_LINK_NICKNAME, testSuite=EntityReference(id=uuid4(), type="TestSuite"), # type: ignore testDefinition=EntityReference(id=uuid4(), type="TestDefinition"), # type: ignore + computePassedFailedRowCount=True, ) # type: ignore @@ -413,6 +420,7 @@ def test_case_column_values_to_be_unique(): entityLink=ENTITY_LINK_NICKNAME, testSuite=EntityReference(id=uuid4(), type="TestSuite"), # type: ignore testDefinition=EntityReference(id=uuid4(), type="TestDefinition"), # type: ignore + computePassedFailedRowCount=True, ) # type: ignore @@ -427,6 +435,7 @@ def test_case_column_values_to_match_regex(): parameterValues=[ TestCaseParameterValue(name="regex", value="J.*"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -441,6 +450,7 @@ def test_case_column_values_to_not_match_regex(): parameterValues=[ TestCaseParameterValue(name="forbiddenRegex", value="X%"), ], + computePassedFailedRowCount=True, ) # type: ignore @@ -680,6 +690,7 @@ def test_case_column_values_to_be_between_date(): TestCaseParameterValue(name="minValue", value="1625127852000"), TestCaseParameterValue(name="maxValue", value="1625127852000"), ], + computePassedFailedRowCount=True, ) # type: ignore diff --git a/ingestion/tests/unit/test_suite/test_validations_databases.py b/ingestion/tests/unit/test_suite/test_validations_databases.py index cd631b54875..9d779434c71 100644 --- a/ingestion/tests/unit/test_suite/test_validations_databases.py +++ b/ingestion/tests/unit/test_suite/test_validations_databases.py @@ -39,6 +39,10 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d") "2021-07-01 00:00:00", "2021-07-01 23:59:59.999999", TestCaseStatus.Failed, + 0.0, + 30.0, + 0.0, + 100.0, ), ), ( @@ -50,157 +54,269 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d") "2021-07-01 10:37:59", "2021-07-01 10:37:59", TestCaseStatus.Success, + None, + None, + None, + None, ), ), ( "test_case_column_value_length_to_be_between", "columnValueLengthsToBeBetween", "COLUMN", - (TestCaseResult, "8", "14", TestCaseStatus.Failed), + ( + TestCaseResult, + "8", + "14", + TestCaseStatus.Failed, + 20.0, + 10.0, + 66.67, + 33.33, + ), ), ( "test_case_column_value_length_to_be_between_col_space", "columnValueLengthsToBeBetween", "COLUMN", - (TestCaseResult, "2", "3", TestCaseStatus.Success), + (TestCaseResult, "2", "3", TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0), ), ( "test_case_column_value_length_to_be_between_no_min", "columnValueLengthsToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + (TestCaseResult, None, None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0), ), ( "test_case_column_value_max_to_be_between", "columnValueMaxToBeBetween", "COLUMN", - (TestCaseResult, "31", None, TestCaseStatus.Failed), + (TestCaseResult, "31", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_column_value_max_to_be_between_no_min", "columnValueMaxToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Failed), + (TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_column_value_mean_to_be_between", "columnValueMeanToBeBetween", "COLUMN", - (TestCaseResult, "30.5", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "30.5", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_value_mean_to_be_between_no_max", "columnValueMeanToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_median_to_be_between", "columnValueMedianToBeBetween", "COLUMN", - (TestCaseResult, "30", None, TestCaseStatus.Failed), + (TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_column_value_min_to_be_between", "columnValueMinToBeBetween", "COLUMN", - (TestCaseResult, "30", None, TestCaseStatus.Success), + ( + TestCaseResult, + "30", + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_min_to_be_between_no_min", "columnValueMinToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_stddev_to_be_between", "columnValueStdDevToBeBetween", "COLUMN", - (TestCaseResult, "0.25", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "0.25", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_value_stddev_to_be_between_no_min", "columnValueStdDevToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_in_set", "columnValuesToBeInSet", "COLUMN", - (TestCaseResult, "20", None, TestCaseStatus.Success), + ( + TestCaseResult, + "20", + None, + TestCaseStatus.Success, + 20.0, + 10.0, + 66.67, + 33.33, + ), ), ( "test_case_column_values_missing_count_to_be_equal", "columnValuesMissingCount", "COLUMN", - (TestCaseResult, "10", None, TestCaseStatus.Success), + ( + TestCaseResult, + "10", + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_values_missing_count_to_be_equal_missing_values", "columnValuesMissingCount", "COLUMN", - (TestCaseResult, "20", None, TestCaseStatus.Failed), + (TestCaseResult, "20", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_column_values_not_in_set", "columnValuesToBeNotInSet", "COLUMN", - (TestCaseResult, "20", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "20", + None, + TestCaseStatus.Failed, + 10.0, + 20.0, + 33.33, + 66.67, + ), ), ( "test_case_column_sum_to_be_between", "columnValuesSumToBeBetween", "COLUMN", - (TestCaseResult, "610", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "610", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_values_to_be_between", "columnValuesToBeBetween", "COLUMN", - (TestCaseResult, "30", None, TestCaseStatus.Success), + (TestCaseResult, "30", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0), ), ( "test_case_column_values_to_be_not_null", "columnValuesToBeNotNull", "COLUMN", - (TestCaseResult, "10", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "10", + None, + TestCaseStatus.Failed, + 20.0, + 10.0, + 66.67, + 33.33, + ), ), ( "test_case_column_values_to_be_unique", "columnValuesToBeUnique", "COLUMN", - (TestCaseResult, "20", "0", TestCaseStatus.Failed), + (TestCaseResult, "20", "0", TestCaseStatus.Failed, 0.0, 20.0, 0.0, 100.0), ), ( "test_case_column_values_to_match_regex", "columnValuesToMatchRegex", "COLUMN", - (TestCaseResult, "30", None, TestCaseStatus.Success), + (TestCaseResult, "30", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0), ), ( "test_case_column_values_to_not_match_regex", "columnValuesToNotMatchRegex", "COLUMN", - (TestCaseResult, "0", None, TestCaseStatus.Success), + (TestCaseResult, "0", None, TestCaseStatus.Success, 30.0, 0.0, 100.0, 0.0), ), ( "test_case_table_column_count_to_be_between", "tableColumnCountToBeBetween", "TABLE", - (TestCaseResult, "7", None, TestCaseStatus.Success), + (TestCaseResult, "7", None, TestCaseStatus.Success, None, None, None, None), ), ( "test_case_table_column_count_to_equal", "tableColumnCountToEqual", "TABLE", - (TestCaseResult, "7", None, TestCaseStatus.Failed), + (TestCaseResult, "7", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_column_name_to_exist", "tableColumnNameToExist", "TABLE", - (TestCaseResult, "1", None, TestCaseStatus.Success), + (TestCaseResult, "1", None, TestCaseStatus.Success, None, None, None, None), ), ( "test_case_column_to_match_set", @@ -211,55 +327,86 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d") "0", None, TestCaseStatus.Failed, + None, + None, + None, + None, ), ), ( "test_case_column_to_match_set_ordered", "tableColumnToMatchSet", "TABLE", - (TestCaseResult, None, None, TestCaseStatus.Failed), + (TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_custom_sql_query", "tableCustomSQLQuery", "TABLE", - (TestCaseResult, "20", None, TestCaseStatus.Failed), + (TestCaseResult, "20", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_custom_sql_query_success", "tableCustomSQLQuery", "TABLE", - (TestCaseResult, "0", None, TestCaseStatus.Success), + (TestCaseResult, "0", None, TestCaseStatus.Success, None, None, None, None), ), ( "test_case_table_row_count_to_be_between", "tableRowCountToBeBetween", "TABLE", - (TestCaseResult, "30", None, TestCaseStatus.Success), + ( + TestCaseResult, + "30", + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_table_row_count_to_be_equal", "tableRowCountToEqual", "TABLE", - (TestCaseResult, "30", None, TestCaseStatus.Failed), + (TestCaseResult, "30", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_row_inserted_count_to_be_between", "tableRowInsertedCountToBeBetween", "TABLE", - (TestCaseResult, "6", None, TestCaseStatus.Success), + (TestCaseResult, "6", None, TestCaseStatus.Success, None, None, None, None), ), ( "test_case_table_custom_sql_query_with_threshold_success", "tableCustomSQLQuery", "TABLE", - (TestCaseResult, "10", None, TestCaseStatus.Success), + ( + TestCaseResult, + "10", + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_table_custom_sql_unsafe_query_aborted", "tableCustomSQLQuery", "TABLE", - (TestCaseResult, None, None, TestCaseStatus.Aborted), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Aborted, + None, + None, + None, + None, + ), ), ], ) @@ -273,7 +420,16 @@ def test_suite_validation_database( ): """Generic test runner for test validations""" test_case = request.getfixturevalue(test_case_name) - type_, val_1, val_2, status = expected + ( + type_, + val_1, + val_2, + status, + passed_rows, + failed_rows, + passed_percentage, + failed_percentage, + ) = expected if test_case_name == "test_case_column_values_to_be_between_date": with patch( @@ -334,4 +490,12 @@ def test_suite_validation_database( assert res.testResultValue[0].value == val_1 if val_2: assert res.testResultValue[1].value == val_2 + if passed_rows: + assert res.passedRows == passed_rows + if failed_rows: + assert res.failedRows == failed_rows + if passed_percentage: + assert round(res.passedRowsPercentage, 2) == passed_percentage + if failed_percentage: + assert round(res.failedRowsPercentage, 2) == failed_percentage assert res.testCaseStatus == status diff --git a/ingestion/tests/unit/test_suite/test_validations_datalake.py b/ingestion/tests/unit/test_suite/test_validations_datalake.py index 1b48123efb7..da0e843810f 100644 --- a/ingestion/tests/unit/test_suite/test_validations_datalake.py +++ b/ingestion/tests/unit/test_suite/test_validations_datalake.py @@ -70,198 +70,409 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame( "test_case_column_value_length_to_be_between", "columnValueLengthsToBeBetween", "COLUMN", - (TestCaseResult, "8", "14", TestCaseStatus.Failed), + ( + TestCaseResult, + "8", + "14", + TestCaseStatus.Failed, + 4000.0, + 2000.0, + 66.67, + 33.33, + ), ), ( "test_case_column_value_length_to_be_between_col_space", "columnValueLengthsToBeBetween", "COLUMN", - (TestCaseResult, "2", "3", TestCaseStatus.Success), + (TestCaseResult, "2", "3", TestCaseStatus.Success, 6000.0, 0.0, 100.0, 0.0), ), ( "test_case_column_value_length_to_be_between_no_min", "columnValueLengthsToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + 6000.0, + 0.0, + 100.0, + 0.0, + ), ), ( "test_case_column_value_max_to_be_between", "columnValueMaxToBeBetween", "COLUMN", - (TestCaseResult, "31.0", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "31.0", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_value_max_to_be_between_no_min", "columnValueMaxToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Failed), + (TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_column_value_mean_to_be_between", "columnValueMeanToBeBetween", "COLUMN", - (TestCaseResult, "30.5", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "30.5", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_value_mean_to_be_between_no_max", "columnValueMeanToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_median_to_be_between", "columnValueMedianToBeBetween", "COLUMN", - (TestCaseResult, "30.5", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "30.5", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_value_min_to_be_between", "columnValueMinToBeBetween", "COLUMN", - (TestCaseResult, "30.0", None, TestCaseStatus.Success), + ( + TestCaseResult, + "30.0", + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_min_to_be_between_no_min", "columnValueMinToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_stddev_to_be_between", "columnValueStdDevToBeBetween", "COLUMN", - (TestCaseResult, "0.500062511721192", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "0.500062511721192", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_value_stddev_to_be_between_no_min", "columnValueStdDevToBeBetween", "COLUMN", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_column_value_in_set", "columnValuesToBeInSet", "COLUMN", - (TestCaseResult, "4000", None, TestCaseStatus.Success), + ( + TestCaseResult, + "4000", + None, + TestCaseStatus.Success, + 4000.0, + 2000.0, + 66.67, + 33.33, + ), ), ( "test_case_column_values_missing_count_to_be_equal", "columnValuesMissingCount", "COLUMN", - (TestCaseResult, "2000", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "2000", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_values_missing_count_to_be_equal_missing_values", "columnValuesMissingCount", "COLUMN", - (TestCaseResult, "4000", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "4000", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_values_not_in_set", "columnValuesToBeNotInSet", "COLUMN", - (TestCaseResult, "4000", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "4000", + None, + TestCaseStatus.Failed, + 2000.0, + 4000.0, + 33.33, + 66.67, + ), ), ( "test_case_column_sum_to_be_between", "columnValuesSumToBeBetween", "COLUMN", - (TestCaseResult, "122000.0", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "122000.0", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_column_values_to_be_between", "columnValuesToBeBetween", "COLUMN", - (TestCaseResult, "30.0", None, TestCaseStatus.Success), + ( + TestCaseResult, + "30.0", + None, + TestCaseStatus.Success, + 6000.0, + 0.0, + 100.0, + 0.0, + ), ), ( "test_case_column_values_to_be_not_null", "columnValuesToBeNotNull", "COLUMN", - (TestCaseResult, "2000", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "2000", + None, + TestCaseStatus.Failed, + 4000.0, + 2000.0, + 66.67, + 33.33, + ), ), ( "test_case_column_values_to_be_unique", "columnValuesToBeUnique", "COLUMN", - (TestCaseResult, "4000", "0", TestCaseStatus.Failed), + ( + TestCaseResult, + "4000", + "0", + TestCaseStatus.Failed, + 0.0, + 4000.0, + 0.0, + 100.0, + ), ), ( "test_case_column_values_to_match_regex", "columnValuesToMatchRegex", "COLUMN", - (TestCaseResult, "6000", None, TestCaseStatus.Success), + ( + TestCaseResult, + "6000", + None, + TestCaseStatus.Success, + 6000.0, + 0.0, + 100.0, + 0.0, + ), ), ( "test_case_column_values_to_not_match_regex", "columnValuesToNotMatchRegex", "COLUMN", - (TestCaseResult, "0", None, TestCaseStatus.Success), + ( + TestCaseResult, + "0", + None, + TestCaseStatus.Success, + 6000.0, + 0.0, + 100.0, + 0.0, + ), ), ( "test_case_table_column_count_to_be_between", "tableColumnCountToBeBetween", "TABLE", - (TestCaseResult, "7", None, TestCaseStatus.Success), + (TestCaseResult, "7", None, TestCaseStatus.Success, None, None, None, None), ), ( "test_case_table_column_count_to_equal", "tableColumnCountToEqual", "TABLE", - (TestCaseResult, "7", None, TestCaseStatus.Failed), + (TestCaseResult, "7", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_column_name_to_exist", "tableColumnNameToExist", "TABLE", - (TestCaseResult, "1", None, TestCaseStatus.Success), + (TestCaseResult, "1", None, TestCaseStatus.Success, None, None, None, None), ), ( "test_case_column_to_match_set", "tableColumnToMatchSet", "TABLE", - ( - TestCaseResult, - "0", - None, - TestCaseStatus.Failed, - ), + (TestCaseResult, "0", None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_column_to_match_set_ordered", "tableColumnToMatchSet", "TABLE", - (TestCaseResult, None, None, TestCaseStatus.Failed), + (TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_custom_sql_query_failed_dl", "tableCustomSQLQuery", "TABLE", - (TestCaseResult, None, None, TestCaseStatus.Failed), + (TestCaseResult, None, None, TestCaseStatus.Failed, None, None, None, None), ), ( "test_case_table_custom_sql_query_success_dl", "tableCustomSQLQuery", "TABLE", - (TestCaseResult, None, None, TestCaseStatus.Success), + ( + TestCaseResult, + None, + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ( "test_case_table_row_count_to_be_between", "tableRowCountToBeBetween", "TABLE", - (TestCaseResult, "6000", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "6000", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_table_row_count_to_be_equal", "tableRowCountToEqual", "TABLE", - (TestCaseResult, "6000", None, TestCaseStatus.Failed), + ( + TestCaseResult, + "6000", + None, + TestCaseStatus.Failed, + None, + None, + None, + None, + ), ), ( "test_case_table_row_inserted_count_to_be_between", "tableRowInsertedCountToBeBetween", "TABLE", - (TestCaseResult, "2000", None, TestCaseStatus.Success), + ( + TestCaseResult, + "2000", + None, + TestCaseStatus.Success, + None, + None, + None, + None, + ), ), ], ) @@ -275,7 +486,16 @@ def test_suite_validation_datalake( """Generic test runner for test validations""" test_case = request.getfixturevalue(test_case_name) - type_, val_1, val_2, status = expected + ( + type_, + val_1, + val_2, + status, + passed_rows, + failed_rows, + passed_percentage, + failed_percentage, + ) = expected test_handler_obj = import_test_case_class( test_type, @@ -297,5 +517,12 @@ def test_suite_validation_datalake( assert res.testResultValue[0].value == val_1 if val_2: assert res.testResultValue[1].value == val_2 - + if passed_rows: + assert res.passedRows == passed_rows + if failed_rows: + assert res.failedRows == failed_rows + if passed_percentage: + assert round(res.passedRowsPercentage, 2) == passed_percentage + if failed_percentage: + assert round(res.failedRowsPercentage, 2) == failed_percentage assert res.testCaseStatus == status diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResolutionStatusResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResolutionStatusResource.java index c8bc4147700..fd23fd90a1a 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResolutionStatusResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResolutionStatusResource.java @@ -1,7 +1,6 @@ package org.openmetadata.service.resources.dqtests; import io.swagger.v3.oas.annotations.ExternalDocumentation; -import io.swagger.v3.oas.annotations.Hidden; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.media.Content; @@ -55,9 +54,8 @@ import org.openmetadata.service.util.ResultList; @Slf4j @Path("/v1/dataQuality/testCases/testCaseIncidentStatus") @Tag( - name = "Test Case Failure Status", - description = "APIs to test case failure status from resolution center.") -@Hidden + name = "Test Case Incident Manager", + description = "APIs to test case incident status from incident manager.") @Produces(MediaType.APPLICATION_JSON) @Consumes(MediaType.APPLICATION_JSON) @Collection(name = "TestCases") diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesLengthsToBeBetween.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesLengthsToBeBetween.json index 3389d009e9f..39d0d26a988 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesLengthsToBeBetween.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesLengthsToBeBetween.json @@ -19,5 +19,6 @@ "description": "The {maxLength} for the column value. if maxLength is not included, minLength is treated as lowerBound and there will be no maximum value length", "dataType": "INT" } - ] + ], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeBetween.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeBetween.json index 0cd0c5cb8f2..e8b74744b1a 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeBetween.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeBetween.json @@ -19,5 +19,6 @@ "description": "The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will be no maximum", "dataType": "INT" } - ] + ], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeInSet.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeInSet.json index 73bd7d5515d..af09aceae59 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeInSet.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeInSet.json @@ -14,5 +14,6 @@ "dataType": "ARRAY", "required": true } - ] + ], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotInSet.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotInSet.json index b239c7b9fe8..c0eae217ef5 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotInSet.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotInSet.json @@ -14,5 +14,6 @@ "dataType": "ARRAY", "required": true } - ] + ], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotNull.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotNull.json index 30b468e537c..750f50ecfb5 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotNull.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeNotNull.json @@ -5,5 +5,6 @@ "description": "This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. ", "entityType": "COLUMN", "testPlatforms": ["OpenMetadata"], - "supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON"] + "supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON"], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeUnique.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeUnique.json index cffef86e161..5a890b4e425 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeUnique.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToBeUnique.json @@ -5,5 +5,6 @@ "description": "This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. ", "entityType": "COLUMN", "testPlatforms": ["OpenMetadata"], - "supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON"] + "supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON"], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToMatchRegex.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToMatchRegex.json index 0844c606d8f..4a9b1f5a7d0 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToMatchRegex.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToMatchRegex.json @@ -14,5 +14,6 @@ "dataType": "STRING", "required": true } - ] + ], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToNotMatchRegex.json b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToNotMatchRegex.json index 81536370f7f..a03b1cd2f10 100644 --- a/openmetadata-service/src/main/resources/json/data/tests/columnValuesToNotMatchRegex.json +++ b/openmetadata-service/src/main/resources/json/data/tests/columnValuesToNotMatchRegex.json @@ -14,5 +14,6 @@ "dataType": "STRING", "required": true } - ] + ], + "supportsRowLevelPassedFailed": true } diff --git a/openmetadata-spec/src/main/resources/json/schema/api/tests/createTestCase.json b/openmetadata-spec/src/main/resources/json/schema/api/tests/createTestCase.json index 5ea8647ccd7..b75886870ab 100644 --- a/openmetadata-spec/src/main/resources/json/schema/api/tests/createTestCase.json +++ b/openmetadata-spec/src/main/resources/json/schema/api/tests/createTestCase.json @@ -39,6 +39,11 @@ "owner": { "description": "Owner of this test", "$ref": "../../type/entityReference.json" + }, + "computePassedFailedRowCount": { + "description": "Compute the passed and failed row count for the test case.", + "type": "boolean", + "default": false } }, "required": ["name", "testDefinition", "entityLink", "testSuite"], diff --git a/openmetadata-spec/src/main/resources/json/schema/tests/basic.json b/openmetadata-spec/src/main/resources/json/schema/tests/basic.json index 7832249cc37..2112e9d3e8a 100644 --- a/openmetadata-spec/src/main/resources/json/schema/tests/basic.json +++ b/openmetadata-spec/src/main/resources/json/schema/tests/basic.json @@ -89,6 +89,22 @@ "testCaseResolutionStatusReference": { "description": "Reference to the failure status object for the test case result.", "$ref": "./testCaseResolutionStatus.json" + }, + "passedRows": { + "description": "Number of rows that passed.", + "type": "integer" + }, + "failedRows": { + "description": "Number of rows that failed.", + "type": "integer" + }, + "passedRowsPercentage": { + "description": "Percentage of rows that passed.", + "type": "number" + }, + "failedRowsPercentage": { + "description": "Percentage of rows that failed.", + "type": "number" } } }, diff --git a/openmetadata-spec/src/main/resources/json/schema/tests/testCase.json b/openmetadata-spec/src/main/resources/json/schema/tests/testCase.json index d419b55b14a..60bd78c6247 100644 --- a/openmetadata-spec/src/main/resources/json/schema/tests/testCase.json +++ b/openmetadata-spec/src/main/resources/json/schema/tests/testCase.json @@ -100,6 +100,11 @@ "description": "When `true` indicates the entity has been soft deleted.", "type": "boolean", "default": false + }, + "computePassedFailedRowCount": { + "description": "Compute the passed and failed row count for the test case.", + "type": "boolean", + "default": false } }, "required": ["name", "testDefinition", "entityLink", "testSuite"], diff --git a/openmetadata-spec/src/main/resources/json/schema/tests/testDefinition.json b/openmetadata-spec/src/main/resources/json/schema/tests/testDefinition.json index da663f0a959..9ac82c7aa7f 100644 --- a/openmetadata-spec/src/main/resources/json/schema/tests/testDefinition.json +++ b/openmetadata-spec/src/main/resources/json/schema/tests/testDefinition.json @@ -155,6 +155,11 @@ "description": "When `true` indicates the entity has been soft deleted.", "type": "boolean", "default": false + }, + "supportsRowLevelPassedFailed": { + "description": "When `true` indicates the test case supports row level passed/failed.", + "type": "boolean", + "default": false } }, "required": ["name", "description", "testPlatforms"],