Fixes #7118 by cleaning up test names (#7494)

* Cleaned up tests names and add registry name tests

* Updated documentation for test types supported by OM
This commit is contained in:
Teddy 2022-09-16 07:04:56 +02:00 committed by GitHub
parent 8e00a6eb8a
commit 1ba6e284fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 706 additions and 254 deletions

View File

@ -95,14 +95,14 @@ logger = profiler_logger()
validation_enum_registry = enum_register()
# Table Tests
validation_enum_registry.add("TableRowCountToEqual")(table_row_count_to_equal)
validation_enum_registry.add("TableRowCountToBeBetween")(table_row_count_to_be_between)
validation_enum_registry.add("TableColumnCountToEqual")(table_column_count_to_equal)
validation_enum_registry.add("TableColumnCountToBeBetween")(
validation_enum_registry.add("tableRowCountToEqual")(table_row_count_to_equal)
validation_enum_registry.add("tableRowCountToBeBetween")(table_row_count_to_be_between)
validation_enum_registry.add("tableColumnCountToEqual")(table_column_count_to_equal)
validation_enum_registry.add("tableColumnCountToBeBetween")(
table_column_count_to_be_between
)
validation_enum_registry.add("TableColumnToMatchSet")(table_column_to_match_set)
validation_enum_registry.add("TableColumnNameToExist")(table_column_name_to_exist)
validation_enum_registry.add("tableColumnToMatchSet")(table_column_to_match_set)
validation_enum_registry.add("tableColumnNameToExist")(table_column_name_to_exist)
validation_enum_registry.add("tableCustomSQLQuery")(table_custom_sql_query)
# # Column Tests
@ -133,7 +133,7 @@ validation_enum_registry.add("columnValueStdDevToBeBetween")(
# # Column Session Tests
validation_enum_registry.add("columnValuesToBeNotInSet")(column_values_not_in_set)
validation_enum_registry.add("ColumnValuesToBeInSet")(column_values_in_set)
validation_enum_registry.add("columnValuesToBeInSet")(column_values_in_set)
validation_enum_registry.add("columnValuesToMatchRegex")(column_values_to_match_regex)
validation_enum_registry.add("columnValuesToNotMatchRegex")(
column_values_to_not_match_regex

View File

@ -0,0 +1,66 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Validate the names in the registry match the ones of the test definition
"""
from unittest import TestCase
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.tests.testDefinition import TestDefinition
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.test_suite.validations.core import validation_enum_registry
test_suite_config = {
"source": {
"type": "TestSuite",
"serviceName": "TestSuiteWorkflow",
"sourceConfig": {"config": {"type": "TestSuite"}},
},
"processor": {
"type": "orm-test-runner",
"config": {},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
}
},
}
class TestRegistryNamesMatchTestDefinition(TestCase):
"""Test the names in the registry match that of the ones in the Test Definition"""
metadata = OpenMetadata(
OpenMetadataConnection.parse_obj(
test_suite_config["workflowConfig"]["openMetadataServerConfig"]
)
)
def test_name_match(self):
"""test all the names in the registry match the ones from the test definition"""
test_definition_names = {
entity.name.__root__
for entity in self.metadata.list_all_entities(
entity=TestDefinition, params={"limit": "100"}
)
}
assert set(validation_enum_registry.registry.keys()).issubset(
test_definition_names
)

View File

@ -270,7 +270,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["ColumnValuesToBeInSet"](
res = validation_enum_registry.registry["columnValuesToBeInSet"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -484,7 +484,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["TableColumnCountToBeBetween"](
res = validation_enum_registry.registry["tableColumnCountToBeBetween"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -504,7 +504,7 @@ class testSuiteValidation(unittest.TestCase):
parameterValues=[TestCaseParameterValue(name="columnCount", value="6")],
)
res = validation_enum_registry.registry["TableColumnCountToEqual"](
res = validation_enum_registry.registry["tableColumnCountToEqual"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -524,7 +524,7 @@ class testSuiteValidation(unittest.TestCase):
parameterValues=[TestCaseParameterValue(name="columnName", value="id")],
)
res = validation_enum_registry.registry["TableColumnNameToExist"](
res = validation_enum_registry.registry["tableColumnNameToExist"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -546,7 +546,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["TableColumnToMatchSet"](
res = validation_enum_registry.registry["tableColumnToMatchSet"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -571,7 +571,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["TableColumnToMatchSet"](
res = validation_enum_registry.registry["tableColumnToMatchSet"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -592,7 +592,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["TableColumnToMatchSet"](
res = validation_enum_registry.registry["tableColumnToMatchSet"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -657,7 +657,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["TableRowCountToBeBetween"](
res = validation_enum_registry.registry["tableRowCountToBeBetween"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,
@ -679,7 +679,7 @@ class testSuiteValidation(unittest.TestCase):
],
)
res = validation_enum_registry.registry["TableRowCountToEqual"](
res = validation_enum_registry.registry["tableRowCountToEqual"](
test_case=test_case,
execution_date=EXECUTION_DATE.timestamp(),
runner=self.runner,

View File

@ -3,63 +3,56 @@ title: Tests
slug: /openmetadata/ingestion/workflows/data-quality/tests
---
# Tests
Here you can see all the supported tests and how to configure them.
# Test
Here you can see all the supported tests definitions and how to configure them in the YAML config file.
A **Test Case** adds logic to the Metrics results. A Metric is neither good nor wrong, so we need the Test definitions to map results into Success or Failures.
A **Test Definition** is a generic definition of a test. This Test Definition then gets specified in a Test Case. This Test Case is where the parameter(s) of a Test Definition are specified.
In this section, you will learn what tests we currently support and how to configure them.
## UI Configuration
You can navigate in the UI to a Table Entity and **Add Tests** from there. The form will help you select the type of test, the column, and its configurations.
From the **Data Quality** Tab you can create both Table and Column Tests:
<Image
src={"/images/openmetadata/data-quality/tests/write-your-first-tests.png"}
alt="Write your first test"
caption="Write your first test"
/>
<Image
src={"/images/openmetadata/data-quality/tests/sample-form-to-create-test.png"}
alt="Sample form to create a column test"
caption="Sample form to create a column test"
/>
Directly from the **Profiler** tab, you can create a Column Test in the column of your choice:
<Image
src={"/images/openmetadata/data-quality/tests/create-test-from-profiler-tab.png"}
alt="Create a column test from the profiler tab"
caption="Create a column test from the profiler tab"
/>
If you'd rather configure the tests directly in the Workflow JSON, we'll show examples for each of them below.
In this section, you will learn what tests we currently support and how to configure them in the YAML/JSON config file.
## Table Tests
Tests applied on top of Table Metrics.
Tests applied on top of a Table. Here is the list of all table tests:
- [Table Row Count to Equal](#table-row-count-to-equal)
- [Table Row Count to be Between](#table-row-count-to-be-between)
- [Table Column Count to Equal](#table-column-count-to-equal)
- [Table Column Count to be Between](#table-column-count-to-be-between)
- [Table Column Name to Exist](#table-column-name-to-exist)
- [Table Column to Match Set](#table-column-to-match-set)
- [Table Custom SQL Test](#table-custom-sql-test)
### Table Row Count to Equal
Validate that the `rowCount` metric is equal to a given value.
Validate the total row count in the table is equal to the given value.
**Properties**:
* `value`: Expected number of rows.
**YAML Config**
```yaml
testDefinitionName: tableRowCountToEqual
parameterValues:
- name: value
value: 2
```
**JSON Config**
```json
"testCase": {
"config": {
"value": 100
},
"tableTestType": "tableRowCountToEqual"
{
"testDefinitionName": "tableRowCountToEqual",
"parameterValues": [
{
"name": "value",
"value": 2
}
]
}
```
### Table Row Count to be Between
Validate that the `rowCount` metric is within a given range of values.
Validate the total row count is within a given range of values.
**Properties**:
@ -68,15 +61,32 @@ Validate that the `rowCount` metric is within a given range of values.
Any of those two need to be informed.
**YAML Config**
```yaml
testDefinitionName: tableRowCountToBeBetween
parameterValues:
- name: minValue
value: 10
- name: maxValue
value: 10
```
**JSON Config**
```json
"testCase": {
"config": {
"minValue": 10,
"maxValue": 100
},
"tableTestType": "tableRowCountToBeBetween"
{
"testDefinitionName": "tableRowCountToBeBetween",
"parameterValues": [
{
"name": "minValue",
"value": 10
},
{
"name": "maxValue",
"value": 10
}
]
}
```
@ -87,34 +97,63 @@ Validate that the number of columns in a table is equal to a given value.
* `columnCount`: Expected number of columns.
**YAML Config**
```yaml
testDefinitionName: tableColumnCountToEqual
parameterValues:
- name: columnCount
value: 5
```
**JSON Config**
```json
"testCase": {
"config": {
"columnCount": 7
},
"tableTestType": "tableColumnCountToEqual"
{
"testDefinitionName": "tableColumnCountToEqual",
"parameterValues": [
{
"name": "columnCount",
"value": 5
}
]
}
```
### Table Column Count to be Between
Validate the number of colum in a table is between the given value
Validate the number of columns in a table is between the given value
**Properties**
* `minColValue`: lower bound
* `maxColValue`: upper bound
**YAML Config**
```yaml
testDefinitionName: tableColumnCountToBeBetween
parameterValues:
- name: minColValue
value: 5
- name: maxColValue
value: 10
```
**JSON Config**
```json
"testCase": {
"config": {
"minColValue": 1,
"maxColValue": 10
},
"tableTestType": "tableColumnCountToBeBetween"
{
"testDefinitionName": "tableColumnCountToBeBetween",
"parameterValues": [
{
"name": "minColValue",
"value": 5
},
{
"name": "maxColValue",
"value": 10
}
]
}
```
@ -125,55 +164,162 @@ Validate a column name is present in the table
* `columnName`: the name of the column to check for
**YAML Config**
```yaml
testDefinitionName: tableColumnNameToExist
parameterValues:
- name: columnName
value: order_id
```
**JSON Config**
```json
"testCase": {
"config": {
"columnName": 1
},
"tableTestType": "tableColumnNameToExist"
{
"testDefinitionName": "tableColumnNameToExist",
"parameterValues": [
{
"name": "columnName",
"value": "order_id"
}
]
}
```
### Table Column to Match Set
Validate a list of table column name matches an expected set of column
Validate a list of table column name matches an expected set of columns
**Properties**
* `columnNames`: comma separated string of column name
* `ordered`: whether the test should check for column ordering. Default to False
**YAML Config**
```yaml
testDefinitionName: tableColumnToMatchSet
parameterValues:
- name: columnNames
value: "col1, col2, col3"
- name: ordered
value: true
```
**JSON Config**
```json
"testCase": {
"config": {
"columnNames": "col1, col2, col3",
"ordered": true
},
"tableTestType": "tableColumnToMatchSet"
{
"testDefinitionName": "tableColumnToMatchSet",
"parameterValues": [
{
"name": "columnNames",
"value": "col1, col2, col3"
},
{
"name": "ordered",
"value": true
}
]
}
```
### Table Custom SQL Test
Write you own SQL test. The test will pass if either of the following condition is met:
- The query result return 0 row
- The query expression `COUNT(<col>)` returns 0
**Properties**
* `sqlExpression`: SQL expression
**Example**
```sql
SELECT
COUNT(customer_tier)
FROM DUAL
WHERE customer_tier = 'GOLD' and lifetime_value < 10000;
```
```sql
SELECT
customer_id
FROM DUAL
WHERE lifetime_value < 0;
```
**YAML Config**
```yaml
testDefinitionName: tableCustomSQLQuery
parameterValues:
- name: sqlExpression
value: >
SELECT
COUNT(customer_tier)
FROM DUAL
WHERE customer_tier = 'GOLD' and lifetime_value < 10000;
```
**JSON Config**
```json
{
"testDefinitionName": "tableCustomSQLQuery",
"parameterValues": [
{
"name": "sqlExpression",
"value": "SELECT COUNT(customer_tier) FROM DUAL WHERE customer_tier = 'GOLD' and lifetime_value < 10000;\n"
}
]
}
```
## Column Tests
Tests applied on top of Column metrics.
Tests applied on top of Column metrics. Here is the list of all column tests:
- [Column Values to Be Unique](#column-values-to-be-unique)
- [Column Values to Be Not Null](#column-values-to-be-not-null)
- [Column Values to Match Regex](#column-values-to-match-regex)
- [Column Values to not Match Regex](#column-values-to-not-match-regex)
- [Column Values to Be in Set](#column-values-to-be-in-set)
- [Column Values to Be Not In Set](#column-values-to-be-not-in-set)
- [Column Values to Be Between](#column-values-to-be-between)
- [Column Values Missing Count to Be Equal](#column-values-missing-count-to-be-equal)
- [Column Values Lengths to Be Between](#column-values-lengths-to-be-between)
- [Column Value Max to Be Between](#column-value-max-to-be-between)
- [Column Value Min to Be Between](#column-value-min-to-be-between)
- [Column Value Mean to Be Between](#column-value-mean-to-be-between)
- [Column Value Median to Be Between](#column-value-median-to-be-between)
- [Column Values Sum to Be Between](#column-values-sum-to-be-between)
- [Column Values Standard Deviation to Be Between](#column-values-standard-deviation-to-be-between)
### Column Values to Be Unique
Makes sure that there are no duplicates in a given column.
Makes sure that there are no duplicate values in a given column.
**Properties**
* `columnValuesToBeUnique`: To be set as `true`. This is required for proper JSON parsing in the profiler module.
**YAML Config**
```yaml
testDefinitionName: columnValuesToBeUnique
parameterValues:
- name: columnNames
value: true
```
**JSON Config**
```json
"testCase": {
"config": {
"columnValuesToBeUnique": true
},
"columnTestType": "columnValuesToBeUnique"
{
"testDefinitionName": "columnValuesToBeUnique",
"parameterValues": [
{
"name": "columnNames",
"value": true
}
]
}
```
@ -184,14 +330,26 @@ Validates that there are no null values in the column.
* `columnValuesToBeNotNull`: To be set as `true`. This is required for proper JSON parsing in the profiler module.
**YAML Config**
```yaml
testDefinitionName: columnValuesToBeNotNull
parameterValues:
- name: columnValuesToBeNotNull
value: true
```
**JSON Config**
```json
"testCase": {
"config": {
"columnValuesToBeNotNull": true
},
"columnTestType": "columnValuesToBeNotNull"
{
"testDefinitionName": "columnValuesToBeNotNull",
"parameterValues": [
{
"name": "columnValuesToBeNotNull",
"value": true
}
]
}
```
@ -202,14 +360,26 @@ This test allows us to specify how many values in a column we expect that will m
* `regex`: SQL `LIKE` expression to match. E.g., `%something%`.
**YAML Config**
```yaml
testDefinitionName: columnValuesToMatchRegex
parameterValues:
- name: regex
value: "%something%"
```
**JSON Config**
```json
"testCase": {
"config": {
"regex": "%something%"
},
"columnTestType": "columnValuesToMatchRegex"
{
"testDefinitionName": "columnValuesToMatchRegex",
"parameterValues": [
{
"name": "regex",
"value": "%something%"
}
]
}
```
@ -220,14 +390,26 @@ This test allows us to specify values in a column we expect that will not match
* `forbiddenRegex`: SQL LIKE expression to match. E.g., `%something%`.
**YAML Config**
```yaml
testDefinitionName: columnValuesToMatchRegex
parameterValues:
- name: forbiddenRegex
value: "%something%"
```
**JSON Config**
```json
"testCase": {
"config": {
"forbiddenRegex": "%something%"
},
"columnTestType": "columnValuesToNotMatchRegex"
{
"testDefinitionName": "columnValuesToMatchRegex",
"parameterValues": [
{
"name": "forbiddenRegex",
"value": "%something%"
}
]
}
```
@ -238,14 +420,29 @@ Validate values form a set are present in a column.
* `allowedValues`: List of allowed strings or numbers.
**YAML Config**
```yaml
testDefinitionName: columnValuesToBeInSet
parameterValues:
- name: allowedValues
value: ["forbidden1", "forbidden2"]
```
**JSON Config**
```json
"testCase": {
"config": {
"allowedValues": ["forbidden1", "forbidden2"]
},
"columnTestType": "columnValuesToBeInSet"
{
"testDefinitionName": "columnValuesToBeInSet",
"parameterValues": [
{
"name": "allowedValues",
"value": [
"forbidden1",
"forbidden2"
]
}
]
}
```
@ -256,14 +453,29 @@ Validate that there are no values in a column in a set of forbidden values.
* `forbiddenValues`: List of forbidden strings or numbers.
**YAML Config**
```yaml
testDefinitionName: columnValuesToBeNotInSet
parameterValues:
- name: forbiddenValues
value: ["forbidden1", "forbidden2"]
```
**JSON Config**
```json
"testCase": {
"config": {
"forbiddenValues": ["forbidden1", "forbidden2"]
},
"columnTestType": "columnValuesToBeNotInSet"
{
"testDefinitionName": "columnValuesToBeNotInSet",
"parameterValues": [
{
"name": "forbiddenValues",
"value": [
"forbidden1",
"forbidden2"
]
}
]
}
```
@ -278,15 +490,29 @@ Validate that the values of a column are within a given range.
Any of those two need to be informed.
**YAML Config**
```yaml
testDefinitionName: columnValuesToBeBetween
parameterValues:
- name: minValue
value: ["forbidden1", "forbidden2"]
```
**JSON Config**
```json
"testCase": {
"config": {
"minValue": 10,
"maxValue": 100
},
"columnTestType": "columnValuesToBeBetween"
{
"testDefinitionName": "columnValuesToBeBetween",
"parameterValues": [
{
"name": "minValue",
"value": [
"forbidden1",
"forbidden2"
]
}
]
}
```
@ -298,15 +524,56 @@ Validates that the number of missing values matches a given number. Missing valu
* `missingCountValue`: The number of missing values needs to be equal to this. This field is mandatory.
* `missingValueMatch`: A list of strings to consider as missing values. Optional.
**YAML Config**
```yaml
testDefinitionName: columnValuesMissingCountToBeEqual
parameterValues:
- name: missingValueMatch
value: ["NA", "N/A"]
- name: missingCountValue
value: 100
```
**JSON Config**
```json
"testCase": {
"config": {
"missingCountValue": 100,
"missingValueMatch": ["NA", "N/A"]
},
"columnTestType": "columnValuesMissingCountToBeEqual"
{
"testDefinitionName": "columnValuesMissingCountToBeEqual",
"parameterValues": [
{
"name": "missingValueMatch",
"value": [
"NA",
"N/A"
]
},
{
"name": "missingCountValue",
"value": 100
}
]
}
```
**JSON Config**
```json
{
"testDefinitionName": "columnValuesMissingCountToBeEqual",
"parameterValues": [
{
"name": "missingValueMatch",
"value": [
"NA",
"N/A"
]
},
{
"name": "missingCountValue",
"value": 100
}
]
}
```
@ -321,15 +588,32 @@ Validates that the lengths of the strings in a column are within a given range.
Any of those two need to be informed.
**YAML Config**
```yaml
testDefinitionName: columnValueLengthsToBeBetween
parameterValues:
- name: minLength
value: 50
- name: maxLength
value: 100
```
**JSON Config**
```json
"testCase": {
"config": {
"minLength": 4,
"maxLength": 18
},
"columnTestType": "columnValueLengthsToBeBetween"
{
"testDefinitionName": "columnValueLengthsToBeBetween",
"parameterValues": [
{
"name": "minLength",
"value": 50
},
{
"name": "maxLength",
"value": 100
}
]
}
```
@ -342,15 +626,32 @@ Validate the maximum value of a column is between a specific range
* `minValueForMaxInCol`: lower bound
* `maxValueForMaxInCol`: upper bound
**YAML Config**
```yaml
testDefinitionName: columnValueMaxToBeBetween
parameterValues:
- name: minValueForMaxInCol
value: 50
- name: maxValueForMaxInCol
value: 100
```
**JSON Config**
```json
"testCase": {
"config": {
"minValueForMaxInCol": 10,
"maxValueForMaxInCol": 50
},
"columnTestType": "columnValueMaxToBeBetween"
{
"testDefinitionName": "columnValueMaxToBeBetween",
"parameterValues": [
{
"name": "minValueForMaxInCol",
"value": 50
},
{
"name": "maxValueForMaxInCol",
"value": 100
}
]
}
```
@ -363,15 +664,32 @@ Validate the minimum value of a column is between a specific range
* `minValueForMinInCol`: lower bound
* `maxValueForMinInCol`: upper bound
**YAML Config**
```yaml
testDefinitionName: columnValueMinToBeBetween
parameterValues:
- name: minValueForMinInCol
value: 10
- name: maxValueForMinInCol
value: 50
```
**JSON Config**
```json
"testCase": {
"config": {
"minValueForMinInCol": 10,
"maxValueForMinInCol": 50
},
"columnTestType": "columnValueMinToBeBetween"
{
"testDefinitionName": "columnValueMinToBeBetween",
"parameterValues": [
{
"name": "minValueForMinInCol",
"value": 10
},
{
"name": "maxValueForMinInCol",
"value": 50
}
]
}
```
@ -384,15 +702,32 @@ Validate the mean of a column is between a specific range
* `minValueForMeanInCol`: lower bound
* `maxValueForMeanInCol`: upper bound
**YAML Config**
```yaml
testDefinitionName: columnValueMeanToBeBetween
parameterValues:
- name: minValueForMeanInCol
value: 5
- name: maxValueForMeanInCol
value: 10
```
**JSON Config**
```json
"testCase": {
"config": {
"minValueForMeanInCol": 10,
"maxValueForMeanInCol": 50
},
"columnTestType": "columnValueMeanToBeBetween"
{
"testDefinitionName": "columnValueMeanToBeBetween",
"parameterValues": [
{
"name": "minValueForMeanInCol",
"value": 5
},
{
"name": "maxValueForMeanInCol",
"value": 10
}
]
}
```
@ -405,15 +740,32 @@ Validate the median of a column is between a specific range
* `minValueForMedianInCol`: lower bound
* `maxValueForMedianInCol`: upper bound
**YAML Config**
```yaml
testDefinitionName: columnValueMedianToBeBetween
parameterValues:
- name: minValueForMedianInCol
value: 5
- name: maxValueForMedianInCol
value: 10
```
**JSON Config**
```json
"testCase": {
"config": {
"minValueForMedianInCol": 10,
"maxValueForMedianInCol": 50
},
"columnTestType": "columnValueMedianToBeBetween"
{
"testDefinitionName": "columnValueMedianToBeBetween",
"parameterValues": [
{
"name": "minValueForMedianInCol",
"value": 5
},
{
"name": "maxValueForMedianInCol",
"value": 10
}
]
}
```
@ -426,15 +778,32 @@ Validate the sum of a column is between a specific range
* `minValueForColSum`: lower bound
* `maxValueForColSum`: upper bound
**YAML Config**
```yaml
testDefinitionName: columnValueMedianToBeBetween
parameterValues:
- name: minValueForMedianInCol
value: 5
- name: maxValueForMedianInCol
value: 10
```
**JSON Config**
```json
"testCase": {
"config": {
"minValueForColSum": 10,
"maxValueForColSum": 50
},
"columnTestType": "columnValuesSumToBeBetween"
{
"testDefinitionName": "columnValueMedianToBeBetween",
"parameterValues": [
{
"name": "minValueForMedianInCol",
"value": 5
},
{
"name": "maxValueForMedianInCol",
"value": 10
}
]
}
```
@ -447,14 +816,31 @@ Validate the standard deviation of a column is between a specific range
* `minValueForStdDevInCol`: lower bound
* `minValueForStdDevInCol`: upper bound
**YAML Config**
```yaml
testDefinitionName: columnValueStdDevToBeBetween
parameterValues:
- name: minValueForStdDevInCol
value: 5
- name: maxValueForStdDevInCol
value: 10
```
**JSON Config**
```json
"testCase": {
"config": {
"minValueForStdDevInCol": 10,
"maxValueForStdDevInCol": 50
},
"columnTestType": "columnValueStdDevToBeBetween"
{
"testDefinitionName": "columnValueStdDevToBeBetween",
"parameterValues": [
{
"name": "minValueForStdDevInCol",
"value": 5
},
{
"name": "maxValueForStdDevInCol",
"value": 10
}
]
}
```

View File

@ -1,19 +1,19 @@
{
"name": "columnValueMaxToBeBetween",
"displayName": "columnValueMaxToBeBetween",
"displayName": "Column Value Max. to be Between",
"description": "This schema defines the test ColumnValueMaxToBeBetween. Test the maximum value in a col is within a range.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValueForMaxInCol",
"displayName": "minValueForMaxInCol",
"displayName": "Max",
"description": "Expected maximum value in the column to be greater or equal than",
"dataType": "INT"
},
{
"name": "maxValueForMaxInCol",
"displayName": "maxValueForMaxInCol",
"displayName": "Max",
"description": "Expected maximum value in the column to be lower or equal than",
"dataType": "INT"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValueMeanToBeBetween",
"displayName": "columnValueMeanToBeBetween",
"displayName": "Column Value Mean To BeBetween",
"description": "This schema defines the test ColumnValueMeanToBeBetween. Test the mean value in a col is within a range.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValueForMeanInCol",
"displayName": "minValueForMeanInCol",
"displayName": "Min",
"description": "Expected mean value for the column to be greater or equal than",
"dataType": "INT"
},
{
"name": "maxValueForMeanInCol",
"displayName": "maxValueForMeanInCol",
"displayName": "Max",
"description": "Expected mean value for the column to be greater or equal than",
"dataType": "INT"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValueMedianToBeBetween",
"displayName": "columnValueMedianToBeBetween",
"displayName": "Column Value Median To Be Between",
"description": "This schema defines the test ColumnValueMedianToBeBetween. Test the median value in a col is within a range.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValueForMedianInCol",
"displayName": "minValueForMedianInCol",
"displayName": "Min",
"description": "Expected median value for the column to be greater or equal than",
"dataType": "INT"
},
{
"name": "maxColValue",
"displayName": "maxColValue",
"displayName": "Max",
"description": "Expected median value for the column to be greater or equal than",
"dataType": "INT"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValueMinToBeBetween",
"displayName": "ColumnValueMinToBeBetween",
"displayName": "Column Value Min. To Be Between",
"description": "This schema defines the test ColumnValueMinToBeBetween. Test the minimum value in a col is within a range.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValueForMinInCol",
"displayName": "minValueForMinInCol",
"displayName": "Min",
"description": "Expected minimum value in the column to be greater or equal than",
"dataType": "INT"
},
{
"name": "maxValueForMinInCol",
"displayName": "maxValueForMinInCol",
"displayName": "Max",
"description": "Expect minimum value in the column to be lower or equal than",
"dataType": "INT"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValueStdDevToBeBetween",
"displayName": "columnValueStdDevToBeBetween",
"displayName": "Column Value Std Dev To Be Between",
"description": "This schema defines the test ColumnValueStdDevToBeBetween. Test the std. dev. value in a col is within a range.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValueForStdDevInCol",
"displayName": "minValueForStdDevInCol",
"displayName": "Min",
"description": "Expected std. dev value for the column to be greater or equal than",
"dataType": "INT"
},
{
"name": "maxValueForStdDevInCol",
"displayName": "maxValueForStdDevInCol",
"displayName": "Max",
"description": "Expected std. dev value for the column to be greater or equal than",
"dataType": "INT"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValueLengthsToBeBetween",
"displayName": "columnValueLengthsToBeBetween",
"displayName": "Column Value Lengths To Be Between",
"description": "This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minLength",
"displayName": "minLength",
"displayName": "Min",
"description": "The {minLength} for the column length. If minLength is not included, maxLength is treated as upperBound and there will be no minimum number of rows",
"dataType": "INT"
},
{
"name": "maxLength",
"displayName": "maxLength",
"displayName": "Max",
"description": "The {maxLength} for the column length. if maxLength is not included, minLength is treated as lowerBound and there will eb no maximum number of rows",
"dataType": "INT"
}

View File

@ -1,20 +1,20 @@
{
"name": "columnValuesMissingCount",
"displayName": "ColumnValuesMissingCount",
"displayName": "Column Values Missing Count",
"description": "This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "missingCountValue",
"displayName": "missingCountValue",
"displayName": "Missing Count",
"description": "No.of missing values to be equal to.",
"dataType": "INT",
"required": true
},
{
"name": "missingValueMatch",
"displayName": "missingValueMatch",
"displayName": "Missing Value to Match",
"description": "By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.",
"dataType": "STRING"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValuesSumToBeBetween",
"displayName": "columnValuesSumToBeBetween",
"displayName": "Column Values Sum To Be Between",
"description": "This schema defines the test ColumnValuesSumToBeBetween. Test the sum of the values of a col is within a range.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValueForColSum",
"displayName": "minValueForColSum",
"displayName": "Min",
"description": "Expected sum of values in the column to be greater or equal than",
"dataType": "INT"
},
{
"name": "maxValueForColSum",
"displayName": "maxValueForColSum",
"displayName": "Max",
"description": "Expected sum values in the column to be lower or equal than",
"dataType": "INT"
}

View File

@ -1,19 +1,19 @@
{
"name": "columnValuesToBeBetween",
"displayName": "columnValuesToBeBetween",
"displayName": "Column Values To Be Between",
"description": "This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minValue",
"displayName": "minValue",
"displayName": "Min",
"description": "The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows",
"dataType": "INT"
},
{
"name": "maxValue",
"displayName": "maxValue",
"displayName": "Max",
"description": "The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows",
"dataType": "INT"
}

View File

@ -1,13 +1,13 @@
{
"name": "ColumnValuesToBeInSet",
"displayName": "ColumnValuesToBeInSet",
"name": "columnValuesToBeInSet",
"displayName": "Column Values To Be In Set",
"description": "This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "allowedValues",
"displayName": "allowedValues",
"displayName": "Allowed Values",
"description": "An Array of values.",
"dataType": "ARRAY",
"required": true

View File

@ -1,13 +1,13 @@
{
"name": "columnValuesToBeNotInSet",
"displayName": "columnValuesToBeNotInSet",
"displayName": "Column Values To Be Not In Set",
"description": "This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "forbiddenValues",
"displayName": "forbiddenValues",
"displayName": "Forbidden Values",
"description": "An Array of values.",
"dataType": "ARRAY",
"required": true

View File

@ -1,7 +1,7 @@
{
"name": "columnValuesToBeNotNull",
"displayName": "columnValuesToBeNotNull",
"displayName": "Column Values To Be Not Null",
"description": "This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"]
"testPlatforms": ["OpenMetadata"]
}

View File

@ -1,7 +1,7 @@
{
"name": "columnValuesToBeUnique",
"displayName": "columnValuesToBeUnique",
"displayName": "Column Values To Be Unique",
"description": "This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"]
"testPlatforms": ["OpenMetadata"]
}

View File

@ -1,13 +1,13 @@
{
"name": "columnValuesToMatchRegex",
"displayName": "columnValuesToMatchRegex",
"displayName": "Column Values To Match Regex Pattern",
"description": "This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "regex",
"displayName": "RegEx",
"displayName": "RegEx Pattern",
"description": "The regular expression the column entries should match.",
"dataType": "STRING",
"required": true

View File

@ -1,13 +1,13 @@
{
"name": "columnValuesToNotMatchRegex",
"displayName": "columnValuesToNotMatchRegex",
"displayName": "Column Values To Not Match Regex",
"description": "This schema defines the test ColumnValuesToNotMatchRegex. Test the values in a column to not match a given regular expression. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "forbiddenRegex",
"displayName": "forbiddenRegex",
"displayName": "RegEx Pattern",
"description": "The regular expression the column entries should not match.",
"dataType": "STRING",
"required": true

View File

@ -1,19 +1,19 @@
{
"name": "TableColumnCountToBeBetween",
"displayName": "TableColumnCountToBeBetween",
"name": "tableColumnCountToBeBetween",
"displayName": "Table Column Count To Be Between",
"description": "This schema defines the test TableColumnCountToBeBetween. Test the number of columns to be between min max value.",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "minColValue",
"displayName": "minColValue",
"displayName": "Min",
"description": "Expected number of columns should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of column",
"dataType": "INT"
},
{
"name": "maxColValue",
"displayName": "maxColValue",
"displayName": "Max",
"description": "Expected number of columns should be less than or equal to {maxValue}. If maxValue is not included, minValue is treated as lowerBound and there will be no maximum number of column",
"dataType": "INT"
}

View File

@ -1,13 +1,13 @@
{
"name": "TableColumnCountToEqual",
"displayName": "TableColumnCountToEqual",
"name": "tableColumnCountToEqual",
"displayName": "Table Column Count To Equal",
"description": "This test defines the test TableColumnCountToEqual. Test the number of columns equal to a value.",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "columnCount",
"displayName": "ColumnCount",
"displayName": "Count",
"description": "Expected number of columns to equal to a {value}",
"dataType": "INT",
"required": true

View File

@ -1,13 +1,13 @@
{
"name": "TableColumnNameToExist",
"displayName": "TableColumnNameToExist",
"name": "tableColumnNameToExist",
"displayName": "Table Column Name To Exist",
"description": "This test defines the test TableColumnNameToExist. Test the table columns exists in the table.",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "columnName",
"displayName": "ColumnName",
"displayName": "Column Name",
"description": "Expected column of the table to exist",
"dataType": "STRING",
"required": true

View File

@ -1,21 +1,21 @@
{
"name": "TableColumnToMatchSet",
"displayName": "TableColumnToMatchSet",
"name": "tableColumnToMatchSet",
"displayName": "Table Column Names To Match Set",
"description": "This test defines the test TableColumnToMatchSet. Test the table columns match a set of values. Unordered by default.",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"parameterDefinition": [
{
"name": "columnNames",
"displayName": "ColumnNames",
"description": "Expected columns of the table to match the ones in {columnValuesSet}",
"displayName": "Column Names",
"description": "Expected columns names of the table to match the ones in {Column Names} -- should be a coma seperated string",
"dataType": "STRING",
"required": "true"
},
{
"name": "ordered",
"displayName": "Ordered",
"description": "Whether or not to considered the order of the list when performing the match",
"description": "Whether or not to considered the order of the list when performing the match check",
"dataType": "BOOLEAN"
}
]

View File

@ -1,13 +1,13 @@
{
"name": "tableCustomSQLQuery",
"displayName": "tableCustomSQLQuery",
"description": "This test defines the test TableCustomSQLQuery. Test if a custom SQL returns 1 or 0 row.",
"displayName": "Custom SQL Query",
"description": "Test if a custom SQL returns 0 row or `COUNT(<x>) == 0`",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"testPlatforms": ["OpenMetadata"],
"parameterDefinition": [
{
"name": "sqlExpression",
"displayName": "SQLExpression",
"displayName": "SQL Expression",
"description": "SQL expression to run against the table",
"dataType": "STRING",
"required": "true"

View File

@ -1,19 +1,19 @@
{
"name": "TableRowCountToBeBetween",
"displayName": "TableRowCountToBeBetween",
"name": "tableRowCountToBeBetween",
"displayName": "Table Row Count To Be Between",
"description": "This defines the test TableRowCountToBeBetween. Test the number of rows to between to two values.",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"parameterDefinition": [
{
"name": "minValue",
"displayName": "minValue",
"displayName": "Min",
"description": "Expected number of columns should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of column",
"dataType": "INT"
},
{
"name": "maxValue",
"displayName": "maxValue",
"displayName": "Max",
"description": "Expected number of columns should be less than or equal to {maxValue}. If maxValue is not included, minValue is treated as lowerBound and there will be no maximum number of column",
"dataType": "INT"
}

View File

@ -1,14 +1,14 @@
{
"name": "TableRowCountToEqual",
"displayName": "TableRowCountToEqual",
"name": "tableRowCountToEqual",
"displayName": "Table Row Count To Equal",
"description": "This schema defines the test TableRowCountToEqual. Test the number of rows equal to a value.",
"entityType": "TABLE",
"testPlatforms": ["OpenMetadata", "GreatExpectations"],
"parameterDefinition": [
{
"name": "value",
"displayName": "Value",
"description": "Expected number of rows {value}",
"displayName": "Count",
"description": "Expected number of rows to be equal to {Count}",
"dataType": "INT",
"required": true
}