mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-10 23:05:55 +00:00
* fix(dq): enable ''Column values to be in set'' test case for boolean columns Add BOOLEAN to supportedDataTypes array in columnValuesToBeInSet.json to allow boolean column validation with predefined allowed values. This enables users to enforce strict true/false validation on boolean columns directly at the column level, resolving issue #22099. Co-authored-by: IceS2 <IceS2@users.noreply.github.com> * Add tests to the new feature * Add migrations and columnValuesToBeNotInSet --------- Co-authored-by: claude[bot] <209825114+claude[bot]@users.noreply.github.com> Co-authored-by: IceS2 <IceS2@users.noreply.github.com>
This commit is contained in:
parent
1e8e38f2ca
commit
bad772db39
@ -137,6 +137,10 @@ CREATE TABLE IF NOT EXISTS entity_deletion_lock (
|
||||
INDEX idx_deletion_lock_time (lockedAt)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
|
||||
|
||||
-- Update columnValuesToBeInSet test definition to include BOOLEAN in supportedDataTypes and update parameterDefinition
|
||||
UPDATE test_definition
|
||||
SET json = JSON_SET(json, '$.supportedDataTypes', JSON_ARRAY('NUMBER', 'INT', 'FLOAT', 'DOUBLE', 'DECIMAL', 'TINYINT', 'SMALLINT', 'BIGINT', 'BYTEINT', 'BYTES', 'STRING', 'MEDIUMTEXT', 'TEXT', 'CHAR', 'VARCHAR', 'BOOLEAN'))
|
||||
WHERE name in ('columnValuesToBeInSet', 'columnValuesToBeNotInSet');
|
||||
|
||||
-- 1. Add generated classificationHash column to support fast lookup and grouping by classification fqnHash
|
||||
ALTER TABLE tag
|
||||
|
||||
@ -171,6 +171,11 @@ CREATE TABLE IF NOT EXISTS entity_deletion_lock (
|
||||
CREATE INDEX IF NOT EXISTS idx_deletion_lock_fqn ON entity_deletion_lock(entityFqn);
|
||||
CREATE INDEX IF NOT EXISTS idx_deletion_lock_time ON entity_deletion_lock(lockedAt);
|
||||
|
||||
-- Update columnValuesToBeInSet test definition to include BOOLEAN in supportedDataTypes and update parameterDefinition
|
||||
UPDATE test_definition
|
||||
SET json = jsonb_set(json, '{supportedDataTypes}', '["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"]'::jsonb)
|
||||
WHERE name in ('columnValuesToBeInSet', 'columnValuesToBeNotInSet');
|
||||
|
||||
-- 1. Add classificationHash column to support fast lookup and grouping by classification fqnHash
|
||||
ALTER TABLE tag
|
||||
ADD COLUMN classificationHash TEXT
|
||||
|
||||
@ -45,6 +45,8 @@ ENTITY_LINK_NAME = "<#E::table::service.db.users::columns::name>"
|
||||
ENTITY_LINK_USER = "<#E::table::service.db.users>"
|
||||
ENTITY_LINK_INSERTED_DATE = "<#E::table::service.db.users::columns::inserted_date>"
|
||||
ENTITY_LINK_EXPECTED_LOCATION = "<#E::table::service.db.users::columns::postal_code>"
|
||||
ENTITY_LINK_IS_ACTIVE = "<#E::table::service.db.users::columns::is_active>"
|
||||
|
||||
|
||||
TABLE = Table(
|
||||
id=uuid4(),
|
||||
@ -61,6 +63,7 @@ TABLE = Table(
|
||||
Column(name="postal_code", dataType=DataType.INT), # type: ignore
|
||||
Column(name="lat", dataType=DataType.DECIMAL), # type: ignore
|
||||
Column(name="lon", dataType=DataType.DECIMAL), # type: ignore
|
||||
Column(name="is_active", dataType=DataType.BOOLEAN), # type: ignore
|
||||
],
|
||||
database=EntityReference(id=uuid4(), name="db", type="database"), # type: ignore
|
||||
) # type: ignore
|
||||
@ -78,6 +81,7 @@ class User(Base):
|
||||
postal_code = sqa.Column(sqa.INT)
|
||||
lat = sqa.Column(sqa.DECIMAL)
|
||||
lon = sqa.Column(sqa.DECIMAL)
|
||||
is_active = sqa.Column(sqa.BOOLEAN)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -122,6 +126,7 @@ def create_sqlite_table():
|
||||
postal_code=60001,
|
||||
lat=49.6852237,
|
||||
lon=1.7743058,
|
||||
is_active=True,
|
||||
),
|
||||
User(
|
||||
name="Jane",
|
||||
@ -133,6 +138,7 @@ def create_sqlite_table():
|
||||
postal_code=19005,
|
||||
lat=45.2589385,
|
||||
lon=1.4731471,
|
||||
is_active=False,
|
||||
),
|
||||
User(
|
||||
name="John",
|
||||
@ -144,6 +150,7 @@ def create_sqlite_table():
|
||||
postal_code=11008,
|
||||
lat=42.9974445,
|
||||
lon=2.2518325,
|
||||
is_active=None,
|
||||
),
|
||||
]
|
||||
session.add_all(data)
|
||||
@ -746,3 +753,17 @@ def test_case_column_values_to_be_at_expected_location():
|
||||
],
|
||||
computePassedFailedRowCount=True,
|
||||
) # type: ignore
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_case_column_value_in_set_boolean():
|
||||
return TestCase(
|
||||
name=TEST_CASE_NAME,
|
||||
entityLink=ENTITY_LINK_IS_ACTIVE,
|
||||
testSuite=EntityReference(id=uuid4(), type="TestSuite"), # type: ignore
|
||||
testDefinition=EntityReference(id=uuid4(), type="TestDefinition"), # type: ignore
|
||||
parameterValues=[
|
||||
TestCaseParameterValue(name="allowedValues", value="[True, False]"),
|
||||
],
|
||||
computePassedFailedRowCount=True,
|
||||
)
|
||||
|
||||
@ -305,7 +305,7 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||
"TABLE",
|
||||
(
|
||||
TestCaseResult,
|
||||
"10",
|
||||
"11",
|
||||
None,
|
||||
TestCaseStatus.Success,
|
||||
None,
|
||||
@ -318,7 +318,7 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||
"test_case_table_column_count_to_equal",
|
||||
"tableColumnCountToEqual",
|
||||
"TABLE",
|
||||
(TestCaseResult, "10", None, TestCaseStatus.Failed, None, None, None, None),
|
||||
(TestCaseResult, "11", None, TestCaseStatus.Failed, None, None, None, None),
|
||||
),
|
||||
(
|
||||
"test_case_table_column_name_to_exist",
|
||||
@ -431,6 +431,12 @@ EXECUTION_DATE = datetime.strptime("2021-07-03", "%Y-%m-%d")
|
||||
None,
|
||||
),
|
||||
),
|
||||
(
|
||||
"test_case_column_value_in_set_boolean",
|
||||
"columnValuesToBeInSet",
|
||||
"COLUMN",
|
||||
(TestCaseResult, "20", None, TestCaseStatus.Success, 20.0, 0.0, 66.67, 0.0),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_suite_validation_database(
|
||||
|
||||
@ -36,6 +36,7 @@ DL_DATA = (
|
||||
60001,
|
||||
49.6852237,
|
||||
1.7743058,
|
||||
True,
|
||||
],
|
||||
[
|
||||
"2",
|
||||
@ -48,6 +49,7 @@ DL_DATA = (
|
||||
19005,
|
||||
45.2589385,
|
||||
1.4731471,
|
||||
False,
|
||||
],
|
||||
[
|
||||
"3",
|
||||
@ -60,6 +62,7 @@ DL_DATA = (
|
||||
11008,
|
||||
42.9974445,
|
||||
2.2518325,
|
||||
None,
|
||||
],
|
||||
)
|
||||
|
||||
@ -77,6 +80,7 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
|
||||
"postal_code",
|
||||
"lat",
|
||||
"lon",
|
||||
"is_active",
|
||||
],
|
||||
)
|
||||
|
||||
@ -403,7 +407,7 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
|
||||
"TABLE",
|
||||
(
|
||||
TestCaseResult,
|
||||
"10",
|
||||
"11",
|
||||
None,
|
||||
TestCaseStatus.Success,
|
||||
None,
|
||||
@ -416,7 +420,7 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
|
||||
"test_case_table_column_count_to_equal",
|
||||
"tableColumnCountToEqual",
|
||||
"TABLE",
|
||||
(TestCaseResult, "10", None, TestCaseStatus.Failed, None, None, None, None),
|
||||
(TestCaseResult, "11", None, TestCaseStatus.Failed, None, None, None, None),
|
||||
),
|
||||
(
|
||||
"test_case_table_column_name_to_exist",
|
||||
@ -517,6 +521,21 @@ DATALAKE_DATA_FRAME = lambda times_increase_sample_data: DataFrame(
|
||||
None,
|
||||
),
|
||||
),
|
||||
(
|
||||
"test_case_column_value_in_set_boolean",
|
||||
"columnValuesToBeInSet",
|
||||
"COLUMN",
|
||||
(
|
||||
TestCaseResult,
|
||||
"4000",
|
||||
None,
|
||||
TestCaseStatus.Success,
|
||||
4000.0,
|
||||
0.0,
|
||||
66.67,
|
||||
0.0,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_suite_validation_datalake(
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
"description": "This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.",
|
||||
"entityType": "COLUMN",
|
||||
"testPlatforms": ["OpenMetadata"],
|
||||
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR"],
|
||||
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"],
|
||||
"parameterDefinition": [
|
||||
{
|
||||
"name": "allowedValues",
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
"description": "This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. ",
|
||||
"entityType": "COLUMN",
|
||||
"testPlatforms": ["OpenMetadata"],
|
||||
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR"],
|
||||
"supportedDataTypes": ["NUMBER", "INT", "FLOAT", "DOUBLE", "DECIMAL", "TINYINT", "SMALLINT", "BIGINT", "BYTEINT", "BYTES", "STRING", "MEDIUMTEXT", "TEXT", "CHAR", "VARCHAR", "BOOLEAN"],
|
||||
"parameterDefinition": [
|
||||
{
|
||||
"name": "forbiddenValues",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user