FIX #22099: enable 'Column values to be in set' test case for boolean columns (#22491)

* fix(dq): enable ''Column values to be in set'' test case for boolean columns

Add BOOLEAN to supportedDataTypes array in columnValuesToBeInSet.json
to allow boolean column validation with predefined allowed values.

This enables users to enforce strict true/false validation on boolean
columns directly at the column level, resolving issue #22099.

Co-authored-by: IceS2 <IceS2@users.noreply.github.com>

* Add tests to the new feature

* Add migrations and columnValuesToBeNotInSet

---------

Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com>
Co-authored-by: IceS2 <IceS2@users.noreply.github.com>
This commit is contained in:
IceS2 2025-07-25 15:17:38 +02:00 committed by GitHub
parent 1e8e38f2ca
commit bad772db39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 61 additions and 6 deletions

View File

@ -137,6 +137,10 @@ CREATE TABLE IF NOT EXISTS entity_deletion_lock (
INDEX idx_deletion_lock_time (lockedAt)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
-- Update columnValuesToBeInSet test definition to include BOOLEAN in supportedDataTypes and update parameterDefinition
UPDATE test_definition
SET json = JSON_SET(json, '$.supportedDataTypes', JSON_ARRAY('NUMBER', 'INT', 'FLOAT', 'DOUBLE', 'DECIMAL', 'TINYINT', 'SMALLINT', 'BIGINT', 'BYTEINT', 'BYTES', 'STRING', 'MEDIUMTEXT', 'TEXT', 'CHAR', 'VARCHAR', 'BOOLEAN'))
WHERE name in ('columnValuesToBeInSet', 'columnValuesToBeNotInSet');
-- 1. Add generated classificationHash column to support fast lookup and grouping by classification fqnHash
ALTER TABLE tag

View File

@ -171,6 +171,11 @@ CREATE TABLE IF NOT EXISTS entity_deletion_lock (
CREATE INDEX IF NOT EXISTS idx_deletion_lock_fqn ON entity_deletion_lock(entityFqn);
CREATE INDEX IF NOT EXISTS idx_deletion_lock_time ON entity_deletion_lock(lockedAt);
-- Update columnValuesToBeInSet test definition to include BOOLEAN in supportedDataTypes and update parameterDefinition
UPDATE test_definition
SET json = jsonb_set(json, '{supportedDataTypes}', '["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"]'::jsonb)
WHERE name in ('columnValuesToBeInSet', 'columnValuesToBeNotInSet');
-- 1. Add classificationHash column to support fast lookup and grouping by classification fqnHash
ALTER TABLE tag
ADD COLUMN classificationHash TEXT

View File

@ -45,6 +45,8 @@ ENTITY_LINK_NAME = "<#E::table::service.db.users::columns::name>"
ENTITY_LINK_USER = "<#E::table::service.db.users>"
ENTITY_LINK_INSERTED_DATE = "<#E::table::service.db.users::columns::inserted_date>"
ENTITY_LINK_EXPECTED_LOCATION = "<#E::table::service.db.users::columns::postal_code>"
ENTITY_LINK_IS_ACTIVE = "<#E::table::service.db.users::columns::is_active>"
TABLE = Table(
id=uuid4(),
@ -61,6 +63,7 @@ TABLE = Table(
Column(name="postal_code", dataType=DataType.INT), # type: ignore
Column(name="lat", dataType=DataType.DECIMAL), # type: ignore
Column(name="lon", dataType=DataType.DECIMAL), # type: ignore
Column(name="is_active", dataType=DataType.BOOLEAN), # type: ignore
],
database=EntityReference(id=uuid4(), name="db", type="database"), # type: ignore
) # type: ignore
@ -78,6 +81,7 @@ class User(Base):
postal_code = sqa.Column(sqa.INT)
lat = sqa.Column(sqa.DECIMAL)
lon = sqa.Column(sqa.DECIMAL)
is_active = sqa.Column(sqa.BOOLEAN)
@pytest.fixture
@ -122,6 +126,7 @@ def create_sqlite_table():
postal_code=60001,
lat=49.6852237,
lon=1.7743058,
is_active=True,
),
User(
name="Jane",
@ -133,6 +138,7 @@ def create_sqlite_table():
postal_code=19005,
lat=45.2589385,
lon=1.4731471,
is_active=False,
),
User(
name="John",
@ -144,6 +150,7 @@ def create_sqlite_table():
postal_code=11008,
lat=42.9974445,
lon=2.2518325,
is_active=None,
),
]
session.add_all(data)
@ -746,3 +753,17 @@ def test_case_column_values_to_be_at_expected_location():
],
computePassedFailedRowCount=True,
) # type: ignore
@pytest.fixture
def test_case_column_value_in_set_boolean():
return TestCase(
name=TEST_CASE_NAME,
entityLink=ENTITY_LINK_IS_ACTIVE,
testSuite=EntityReference(id=uuid4(), type="TestSuite"), # type: ignore
testDefinition=EntityReference(id=uuid4(), type="TestDefinition"), # type: ignore
parameterValues=[
TestCaseParameterValue(name="allowedValues", value="[True, False]"),
],
computePassedFailedRowCount=True,
)

View File

@ -305,7 +305,7 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
"TABLE",
(
TestCaseResult,
"10",
"11",
None,
TestCaseStatus.Success,
None,
@ -318,7 +318,7 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
"test_case_table_column_count_to_equal",
"tableColumnCountToEqual",
"TABLE",
(TestCaseResult, "10", None, TestCaseStatus.Failed, None, None, None, None),
(TestCaseResult, "11", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_table_column_name_to_exist",
@ -431,6 +431,12 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
None,
),
),
(
"test_case_column_value_in_set_boolean",
"columnValuesToBeInSet",
"COLUMN",
(TestCaseResult, "20", None, TestCaseStatus.Success, 20.0, 0.0, 66.67, 0.0),
),
],
)
def test_suite_validation_database(

View File

@ -36,6 +36,7 @@ DL_DATA = (
60001,
49.6852237,
1.7743058,
True,
],
[
"2",
@ -48,6 +49,7 @@ DL_DATA = (
19005,
45.2589385,
1.4731471,
False,
],
[
"3",
@ -60,6 +62,7 @@ DL_DATA = (
11008,
42.9974445,
2.2518325,
None,
],
)
@ -77,6 +80,7 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
"postal_code",
"lat",
"lon",
"is_active",
],
)
@ -403,7 +407,7 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
"TABLE",
(
TestCaseResult,
"10",
"11",
None,
TestCaseStatus.Success,
None,
@ -416,7 +420,7 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
"test_case_table_column_count_to_equal",
"tableColumnCountToEqual",
"TABLE",
(TestCaseResult, "10", None, TestCaseStatus.Failed, None, None, None, None),
(TestCaseResult, "11", None, TestCaseStatus.Failed, None, None, None, None),
),
(
"test_case_table_column_name_to_exist",
@ -517,6 +521,21 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
None,
),
),
(
"test_case_column_value_in_set_boolean",
"columnValuesToBeInSet",
"COLUMN",
(
TestCaseResult,
"4000",
None,
TestCaseStatus.Success,
4000.0,
0.0,
66.67,
0.0,
),
),
],
)
def test_suite_validation_datalake(

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"],
"parameterDefinition": [
{
"name": "allowedValues",

View File

@ -5,7 +5,7 @@
"description": "This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. ",
"entityType": "COLUMN",
"testPlatforms": ["OpenMetadata"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR"],
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"],
"parameterDefinition": [
{
"name": "forbiddenValues",