fix: add lowcardinality support (#24921)

This commit is contained in:
Teddy 2025-12-20 14:03:17 +01:00 committed by GitHub
parent 8c855c6b41
commit 66f2cb4d9c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 137 additions and 8 deletions

View File

@ -0,0 +1,48 @@
-- Add LOWCARDINALITY to supportedDataTypes for test definitions that already support STRING
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValueLengthsToBeBetween'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesMissingCount'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesToBeInSet'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesToBeNotInSet'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesToBeNotNull'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesToBeUnique'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesToMatchRegex'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');
UPDATE test_definition
SET json = JSON_ARRAY_APPEND(json, '$.supportedDataTypes', 'LOWCARDINALITY')
WHERE name = 'columnValuesToNotMatchRegex'
AND JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"STRING"')
AND NOT JSON_CONTAINS(JSON_EXTRACT(json, '$.supportedDataTypes'), '"LOWCARDINALITY"');

View File

@ -0,0 +1,80 @@
-- Add LOWCARDINALITY to supportedDataTypes for test definitions that already support STRING
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValueLengthsToBeBetween'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesMissingCount'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesToBeInSet'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesToBeNotInSet'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesToBeNotNull'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesToBeUnique'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesToMatchRegex'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;
UPDATE test_definition
SET json = jsonb_set(
json::jsonb,
'{supportedDataTypes}',
(json->'supportedDataTypes')::jsonb || '["LOWCARDINALITY"]'::jsonb
)
WHERE name = 'columnValuesToNotMatchRegex'
AND json->'supportedDataTypes' @> '"STRING"'::jsonb
AND NOT json->'supportedDataTypes' @> '"LOWCARDINALITY"'::jsonb;

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "ARRAY"],
"supportedDataTypes": ["BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "ARRAY", "LOWCARDINALITY"],
"parameterDefinition": [
{
"name": "minLength",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON"],
"supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON","LOWCARDINALITY"],
"parameterDefinition": [
{
"name": "missingCountValue",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN", "LOWCARDINALITY"],
"parameterDefinition": [
{
"name": "allowedValues",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN", "LOWCARDINALITY"],
"parameterDefinition": [
{
"name": "forbiddenValues",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON"],
"supportedDataTypes": ["NUMBER","TINYINT","SMALLINT","INT","BIGINT","BYTEINT","BYTES","FLOAT","DOUBLE","DECIMAL","NUMERIC","TIMESTAMP","TIMESTAMPZ","TIME","DATE","DATETIME","INTERVAL","STRING","MEDIUMTEXT","TEXT","CHAR","VARCHAR","BOOLEAN","BINARY","VARBINARY","ARRAY","BLOB","LONGBLOB","MEDIUMBLOB","MAP","STRUCT","UNION","SET","GEOGRAPHY","ENUM","JSON","UUID","VARIANT","GEOMETRY","POINT","POLYGON","LOWCARDINALITY"],
"supportsRowLevelPassedFailed": true,
"provider": "system",
"dataQualityDimension": "Completeness"

View File

@ -46,7 +46,8 @@
"VARIANT",
"GEOMETRY",
"POINT",
"POLYGON"
"POLYGON",
"LOWCARDINALITY"
],
"supportsRowLevelPassedFailed": true,
"provider": "system",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR"],
"supportedDataTypes": ["BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "LOWCARDINALITY"],
"parameterDefinition": [
{
"name": "regex",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToNotMatchRegex. Test the values in a column to not match a given regular expression. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR"],
"supportedDataTypes": ["BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "LOWCARDINALITY"],
"parameterDefinition": [
{
"name": "forbiddenRegex",