Fixes 16305: Added Test Case for Matching Enum (#16362)

* Added Test Case for Matching Enum

1. Implemented the test case using the `matchEnum` parameter.
2. Added integration tests.
3. Added migrations.

* fix tests

* fixed tests

* format

* fixed tests

* clear search cache before running ingestion

* format

* changed scopt of aws fixture

* moved migrations to 1.5.0
This commit is contained in:
Imri Paran 2024-05-28 09:30:30 +02:00 committed by GitHub
parent d909a3141e
commit a4c516d2c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
63 changed files with 550 additions and 869 deletions

View File

@ -0,0 +1,11 @@
-- matchEnum Test Definition Parameter for columnValuesToBeInSet
UPDATE test_definition
set json = JSON_MERGE_PRESERVE(
json,
'{"parameterDefinition": ['
'{"name": "matchEnum", "displayName": "Match enum", "description": "If enabled, validate that each value independently matches the enum.", "dataType": "BOOLEAN", "required": false, "optionValues": []}'
']}'
)
WHERE name = 'columnValuesToBeInSet'
AND JSON_LENGTH(json, '$.parameterDefinition') < 2;

View File

@ -0,0 +1,8 @@
-- matchEnum Test Definition Parameter for columnValuesToBeInSet
UPDATE test_definition
SET json = jsonb_set(json, '{parameterDefinition}', json->'parameterDefinition' || '['
'{"name": "matchEnum", "displayName": "Match enum", "description": "If enabled, validate that each value independently matches the enum.", "dataType": "BOOLEAN", "required": false, "optionValues": []}'
']'::jsonb
)
WHERE name = 'columnValuesToBeInSet'
AND JSONB_ARRAY_LENGTH(json->'parameterDefinition') < 2;

View File

@ -50,9 +50,21 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator):
literal_eval,
)
match_enum = self.get_test_case_param_value(
self.test_case.parameterValues, # type: ignore
"matchEnum",
bool,
default=False,
)
try:
column: Union[SQALikeColumn, Column] = self._get_column_name()
res = self._run_results(Metrics.COUNT_IN_SET, column, values=allowed_values)
if match_enum:
count = self._run_results(
Metrics.ROW_COUNT, column, values=allowed_values
)
res = count - res
except (ValueError, RuntimeError) as exc:
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
logger.debug(traceback.format_exc())
@ -71,7 +83,7 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator):
return self.get_test_case_result_object(
self.execution_date,
self.get_test_case_status(res >= 1),
self.get_test_case_status(res == 0 if match_enum else res >= 1),
f"Found countInSet={res}.",
[TestResultValue(name=ALLOWED_VALUE_COUNT, value=str(res))],
row_count=row_count,

View File

@ -25,6 +25,9 @@ class LRUCache:
self._cache = OrderedDict()
self.capacity = capacity
def clear(self):
self._cache = OrderedDict()
def get(self, key):
"""
Returns the value associated to `key` if it exists,

View File

@ -0,0 +1,216 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Datalake ingestion integration tests"""
import os
from copy import deepcopy
import boto3
import pytest
from moto import mock_s3
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.workflow.data_quality import TestSuiteWorkflow
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
BUCKET_NAME = "MyBucket"
INGESTION_CONFIG = {
"source": {
"type": "datalake",
"serviceName": "datalake_for_integration_tests",
"serviceConnection": {
"config": {
"type": "Datalake",
"configSource": {
"securityConfig": {
"awsAccessKeyId": "fake_access_key",
"awsSecretAccessKey": "fake_secret_key",
"awsRegion": "us-weat-1",
}
},
"bucketName": f"{BUCKET_NAME}",
}
},
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
DATA_QUALITY_CONFIG = {
"source": {
"type": "datalake",
"serviceName": "datalake_for_integration_tests",
"serviceConnection": {
"config": {
"type": "Datalake",
"configSource": {
"securityConfig": {
"awsAccessKeyId": "fake_access_key",
"awsSecretAccessKey": "fake_secret_key",
"awsRegion": "us-weat-1",
}
},
"bucketName": f"{BUCKET_NAME}",
}
},
"sourceConfig": {
"config": {
"type": "TestSuite",
"entityFullyQualifiedName": 'datalake_for_integration_tests.default.MyBucket."users.csv"',
}
},
},
"processor": {
"type": "orm-test-runner",
"config": {
"testCases": [
{
"name": "first_name_includes_john",
"testDefinitionName": "columnValuesToBeInSet",
"columnName": "first_name",
"parameterValues": [
{
"name": "allowedValues",
"value": "['John']",
}
],
},
{
"name": "first_name_is_john",
"testDefinitionName": "columnValuesToBeInSet",
"columnName": "first_name",
"parameterValues": [
{
"name": "allowedValues",
"value": "['John']",
},
{
"name": "matchEnum",
"value": "True",
},
],
},
]
},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"loggerLevel": "DEBUG",
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
},
},
}
@pytest.fixture(scope="module", autouse=True)
def aws():
with mock_s3():
yield boto3.client("s3", region_name="us-east-1")
@pytest.fixture(scope="class", autouse=True)
def setup_s3(request) -> None:
# Mock our S3 bucket and ingest a file
boto3.DEFAULT_SESSION = None
request.cls.s3_client = boto3.client(
"s3",
region_name="us-weat-1",
)
s3 = boto3.resource(
"s3",
region_name="us-west-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
request.cls.s3_client.create_bucket(
Bucket=BUCKET_NAME,
CreateBucketConfiguration={"LocationConstraint": "us-west-1"},
)
s3.meta.client.head_bucket(Bucket=BUCKET_NAME)
current_dir = os.path.dirname(__file__)
resources_dir = os.path.join(current_dir, "resources")
resources_paths = [
os.path.join(path, filename)
for path, _, files in os.walk(resources_dir)
for filename in files
]
request.cls.s3_keys = []
for path in resources_paths:
key = os.path.relpath(path, resources_dir)
request.cls.s3_keys.append(key)
request.cls.s3_client.upload_file(Filename=path, Bucket=BUCKET_NAME, Key=key)
yield
bucket = s3.Bucket(BUCKET_NAME)
for key in bucket.objects.all():
key.delete()
bucket.delete()
@pytest.fixture(scope="class")
def run_ingestion(metadata):
ingestion_workflow = MetadataWorkflow.create(INGESTION_CONFIG)
ingestion_workflow.execute()
ingestion_workflow.raise_from_status()
ingestion_workflow.stop()
yield
db_service = metadata.get_by_name(
entity=DatabaseService, fqn="datalake_for_integration_tests"
)
metadata.delete(DatabaseService, db_service.id, recursive=True, hard_delete=True)
@pytest.fixture
def run_test_suite_workflow(run_ingestion):
ingestion_workflow = TestSuiteWorkflow.create(DATA_QUALITY_CONFIG)
ingestion_workflow.execute()
ingestion_workflow.raise_from_status()
ingestion_workflow.stop()
@pytest.fixture()
def run_profiler(run_ingestion):
"""Test profiler ingestion"""
workflow_config = deepcopy(INGESTION_CONFIG)
workflow_config["source"]["sourceConfig"]["config"].update(
{
"type": "Profiler",
}
)
workflow_config["processor"] = {
"type": "orm-profiler",
"config": {},
}
profiler_workflow = ProfilerWorkflow.create(workflow_config)
profiler_workflow.execute()
profiler_workflow.raise_from_status()
profiler_workflow.stop()

View File

@ -0,0 +1,27 @@
from typing import List
import pytest
from metadata.generated.schema.tests.basic import TestCaseStatus
from metadata.generated.schema.tests.testCase import TestCase
class TestDataQuality:
@pytest.mark.parametrize(
"test_case_name,expected_status",
[
("first_name_includes_john", TestCaseStatus.Success),
("first_name_is_john", TestCaseStatus.Failed),
],
)
def test_data_quality(
self, run_test_suite_workflow, metadata, test_case_name, expected_status
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.__root__ == test_case_name), None
)
assert test_case is not None
assert test_case.testCaseResult.testCaseStatus == expected_status

View File

@ -11,137 +11,35 @@
"""Datalake ingestion integration tests"""
import os
from copy import deepcopy
from unittest import TestCase
import boto3
import botocore
import pytest
from moto import mock_s3
from metadata.generated.schema.entity.data.table import DataType, Table
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
OpenMetadataJWTClientConfig,
)
from metadata.ingestion.ometa.models import EntityList
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
BUCKET_NAME = "MyBucket"
INGESTION_CONFIG = {
"source": {
"type": "datalake",
"serviceName": "datalake_for_integration_tests",
"serviceConnection": {
"config": {
"type": "Datalake",
"configSource": {
"securityConfig": {
"awsAccessKeyId": "fake_access_key",
"awsSecretAccessKey": "fake_secret_key",
"awsRegion": "us-weat-1",
}
},
"bucketName": f"{BUCKET_NAME}",
}
},
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
@mock_s3
class DatalakeTestE2E(TestCase):
class TestDatalake:
"""datalake profiler E2E test"""
@classmethod
def setUpClass(cls) -> None:
server_config = OpenMetadataConnection(
hostPort="http://localhost:8585/api",
authProvider="openmetadata",
securityConfig=OpenMetadataJWTClientConfig(
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
),
) # type: ignore
cls.metadata = OpenMetadata(server_config)
metadata: OpenMetadata = None
s3_client = None
def setUp(self) -> None:
# Mock our S3 bucket and ingest a file
boto3.DEFAULT_SESSION = None
self.client = boto3.client(
"s3",
region_name="us-weat-1",
)
# check that we are not running our test against a real bucket
try:
s3 = boto3.resource(
"s3",
region_name="us-west-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
s3.meta.client.head_bucket(Bucket=BUCKET_NAME)
except botocore.exceptions.ClientError:
pass
else:
err = f"{BUCKET_NAME} should not exist."
raise EnvironmentError(err)
self.client.create_bucket(
Bucket=BUCKET_NAME,
CreateBucketConfiguration={"LocationConstraint": "us-west-1"},
)
current_dir = os.path.dirname(__file__)
resources_dir = os.path.join(current_dir, "resources")
resources_paths = [
os.path.join(path, filename)
for path, _, files in os.walk(resources_dir)
for filename in files
]
self.s3_keys = []
for path in resources_paths:
key = os.path.relpath(path, resources_dir)
self.s3_keys.append(key)
self.client.upload_file(Filename=path, Bucket=BUCKET_NAME, Key=key)
@pytest.fixture(autouse=True)
def set_metdata(self, metadata):
self.metadata = metadata
@pytest.mark.order(10000)
def test_ingestion(self):
def test_ingestion(self, run_ingestion):
"""test ingestion of datalake data"""
# Ingest our S3 data
ingestion_workflow = MetadataWorkflow.create(INGESTION_CONFIG)
ingestion_workflow.execute()
ingestion_workflow.raise_from_status()
ingestion_workflow.stop()
resp: EntityList[Table] = self.metadata.list_entities(
entity=Table, params={"database": "datalake_for_integration_tests.default"}
) # type: ignore
entities = resp.entities
self.assertEqual(len(entities), 3)
assert len(entities) == 3
names = [entity.name.__root__ for entity in entities]
self.assertListEqual(
sorted(["names.json", "new_users.parquet", "users.csv"]), sorted(names)
)
assert sorted(["names.json", "new_users.parquet", "users.csv"]) == sorted(names)
for entity in entities:
columns = entity.columns
@ -149,27 +47,7 @@ class DatalakeTestE2E(TestCase):
if column.dataType == DataType.JSON:
assert column.children
@pytest.mark.order(10001)
def test_profiler(self):
"""Test profiler ingestion"""
workflow_config = deepcopy(INGESTION_CONFIG)
workflow_config["source"]["sourceConfig"]["config"].update(
{
"type": "Profiler",
}
)
workflow_config["processor"] = {
"type": "orm-profiler",
"config": {},
}
profiler_workflow = ProfilerWorkflow.create(workflow_config)
profiler_workflow.execute()
status = profiler_workflow.result_status()
profiler_workflow.stop()
assert status == 0
def test_profiler(self, run_profiler):
csv_ = self.metadata.get_by_name(
entity=Table,
fqn='datalake_for_integration_tests.default.MyBucket."users.csv"',

View File

@ -1,11 +1,55 @@
import contextlib
import logging
import os
import tarfile
import zipfile
from subprocess import CalledProcessError
import docker
import pytest
from testcontainers.postgres import PostgresContainer
from metadata.generated.schema.api.services.createDatabaseService import (
CreateDatabaseServiceRequest,
)
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
BasicAuth,
)
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
)
from metadata.generated.schema.entity.services.databaseService import (
DatabaseConnection,
DatabaseService,
DatabaseServiceType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
Sink,
Source,
SourceConfig,
WorkflowConfig,
)
from metadata.ingestion.lineage.sql_lineage import search_cache
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.metadata import MetadataWorkflow
@pytest.fixture(autouse=True, scope="session")
def config_logging():
logging.getLogger("sqlfluff").setLevel(logging.CRITICAL)
@contextlib.contextmanager
def try_bind(container, container_port, host_port):
try:
with container.with_bind_ports(container_port, host_port) as container:
yield container
except docker.errors.APIError:
logging.warning("Port %s is already in use, trying another port", host_port)
with container.with_bind_ports(container_port, None) as container:
yield container
@pytest.fixture(scope="session")
def postgres_container(tmp_path_factory):
@ -25,7 +69,9 @@ def postgres_container(tmp_path_factory):
"pg_stat_statements.track=all",
]
with container as container:
with try_bind(container, 5432, 5432) if not os.getenv(
"CI"
) else container as container:
docker_container = container.get_wrapped_container()
docker_container.exec_run(["mkdir", "/data"])
docker_container.put_archive(
@ -57,3 +103,47 @@ def postgres_container(tmp_path_factory):
returncode=res[0], cmd=res, output=res[1].decode("utf-8")
)
yield container
@pytest.fixture(scope="module")
def db_service(metadata, postgres_container):
service = CreateDatabaseServiceRequest(
name="docker_test_db",
serviceType=DatabaseServiceType.Postgres,
connection=DatabaseConnection(
config=PostgresConnection(
username=postgres_container.username,
authType=BasicAuth(password=postgres_container.password),
hostPort="localhost:"
+ postgres_container.get_exposed_port(postgres_container.port),
database="dvdrental",
)
),
)
service_entity = metadata.create_or_update(data=service)
service_entity.connection.config.authType.password = postgres_container.password
yield service_entity
metadata.delete(
DatabaseService, service_entity.id, recursive=True, hard_delete=True
)
@pytest.fixture(scope="module")
def ingest_metadata(db_service, metadata: OpenMetadata):
workflow_config = OpenMetadataWorkflowConfig(
source=Source(
type=db_service.connection.config.type.value.lower(),
serviceName=db_service.fullyQualifiedName.__root__,
serviceConnection=db_service.connection,
sourceConfig=SourceConfig(config={}),
),
sink=Sink(
type="metadata-rest",
config={},
),
workflowConfig=WorkflowConfig(openMetadataServerConfig=metadata.config),
)
metadata_ingestion = MetadataWorkflow.create(workflow_config)
search_cache.clear()
metadata_ingestion.execute()
metadata_ingestion.raise_from_status()

View File

@ -0,0 +1,126 @@
import sys
from typing import Collection, List, Tuple
import pytest
from metadata.data_quality.api.models import (
TestCaseDefinition,
TestCaseParameterValue,
TestSuiteProcessorConfig,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
TestSuiteConfigType,
TestSuitePipeline,
)
from metadata.generated.schema.metadataIngestion.workflow import (
LogLevels,
OpenMetadataWorkflowConfig,
Processor,
Sink,
Source,
SourceConfig,
WorkflowConfig,
)
from metadata.generated.schema.tests.basic import TestCaseStatus
from metadata.generated.schema.tests.testCase import TestCase
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.data_quality import TestSuiteWorkflow
if not sys.version_info >= (3, 9):
pytest.skip("requires python 3.9+", allow_module_level=True)
def parameteres_from_tuples(
tup: Collection[Tuple[str, str]]
) -> List[TestCaseParameterValue]:
return [TestCaseParameterValue(name=v[0], value=v[1]) for v in tup]
@pytest.fixture(scope="module")
def run_data_quality_workflow(
ingest_metadata, db_service: DatabaseService, metadata: OpenMetadata
):
workflow_config = OpenMetadataWorkflowConfig(
source=Source(
type=TestSuiteConfigType.TestSuite.value,
serviceName="MyTestSuite",
sourceConfig=SourceConfig(
config=TestSuitePipeline(
type=TestSuiteConfigType.TestSuite,
entityFullyQualifiedName=f"{db_service.fullyQualifiedName.__root__}.dvdrental.public.customer",
)
),
serviceConnection=db_service.connection,
),
processor=Processor(
type="orm-test-runner",
config=TestSuiteProcessorConfig(
testCases=[
TestCaseDefinition(
name="first_name_includes_tom_and_jerry_wo_enum",
testDefinitionName="columnValuesToBeInSet",
columnName="first_name",
parameterValues=parameteres_from_tuples(
[
("allowedValues", "['Tom', 'Jerry']"),
]
),
),
TestCaseDefinition(
name="first_name_includes_tom_and_jerry",
testDefinitionName="columnValuesToBeInSet",
columnName="first_name",
parameterValues=parameteres_from_tuples(
[
("allowedValues", "['Tom', 'Jerry']"),
("matchEnum", ""),
]
),
),
TestCaseDefinition(
name="first_name_is_tom_or_jerry",
testDefinitionName="columnValuesToBeInSet",
columnName="first_name",
parameterValues=parameteres_from_tuples(
[
("allowedValues", "['Tom', 'Jerry']"),
("matchEnum", "True"),
]
),
),
]
),
),
sink=Sink(
type="metadata-rest",
config={},
),
workflowConfig=WorkflowConfig(
loggerLevel=LogLevels.DEBUG, openMetadataServerConfig=metadata.config
),
)
test_suite_procesor = TestSuiteWorkflow.create(workflow_config)
test_suite_procesor.execute()
test_suite_procesor.raise_from_status()
@pytest.mark.parametrize(
"test_case_name,expected_status",
[
("first_name_includes_tom_and_jerry_wo_enum", TestCaseStatus.Success),
("first_name_includes_tom_and_jerry", TestCaseStatus.Success),
("first_name_is_tom_or_jerry", TestCaseStatus.Failed),
],
)
def test_data_quality(
run_data_quality_workflow, metadata: OpenMetadata, test_case_name, expected_status
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.__root__ == test_case_name), None
)
assert test_case is not None
assert test_case.testCaseResult.testCaseStatus == expected_status

View File

@ -1,4 +1,3 @@
import logging
import sys
import pytest
@ -36,6 +35,7 @@ from metadata.generated.schema.metadataIngestion.workflow import (
SourceConfig,
WorkflowConfig,
)
from metadata.ingestion.lineage.sql_lineage import search_cache
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.profiler.api.models import ProfilerProcessorConfig
from metadata.workflow.metadata import MetadataWorkflow
@ -46,11 +46,6 @@ if not sys.version_info >= (3, 9):
pytest.skip("requires python 3.9+", allow_module_level=True)
@pytest.fixture(autouse=True)
def config_logging():
logging.getLogger("sqlfluff").setLevel(logging.CRITICAL)
@pytest.fixture(scope="module")
def db_service(metadata, postgres_container):
service = CreateDatabaseServiceRequest(
@ -136,6 +131,7 @@ def run_profiler_workflow(ingest_metadata, db_service, metadata):
),
)
metadata_ingestion = ProfilerWorkflow.create(workflow_config.dict())
search_cache.clear()
metadata_ingestion.execute()
return
@ -171,6 +167,7 @@ def ingest_query_usage(ingest_metadata, db_service, metadata):
},
}
workflow = UsageWorkflow.create(workflow_config)
search_cache.clear()
workflow.execute()
workflow.raise_from_status()
return

View File

@ -68,7 +68,7 @@ class TestSecretsManagerFactory(TestCase):
@patch.dict(os.environ, {"AZURE_KEY_VAULT_NAME": "test"})
@patch("metadata.clients.aws_client.boto3")
def test_all_providers_has_implementation(self, mocked_boto3):
mocked_boto3.client.return_value = {}
mocked_boto3.s3_client.return_value = {}
secret_manager_providers = [
secret_manager_provider
for secret_manager_provider in SecretsManagerProvider

View File

@ -1,16 +0,0 @@
---
title: columnValueMaxToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemaxtobebetween
---
# columnValueMaxToBeBetween
*This schema defines the test ColumnValueMaxToBeBetween. Test the maximum value in a col is within a range.*
## Properties
- **`minValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be greater or equal than.
- **`maxValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMeanToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemeantobebetween
---
# columnValueMeanToBeBetween
*This schema defines the test ColumnValueMeanToBeBetween. Test the mean value in a col is within a range.*
## Properties
- **`minValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
- **`maxValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMedianToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemediantobebetween
---
# columnValueMedianToBeBetween
*This schema defines the test ColumnValueMedianToBeBetween. Test the median value in a col is within a range.*
## Properties
- **`minValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
- **`maxValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMinToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemintobebetween
---
# columnValueMinToBeBetween
*This schema defines the test ColumnValueMinToBeBetween. Test the minimum value in a col is within a range.*
## Properties
- **`minValueForMinInCol`** *(integer)*: Expected minimum value in the column to be greater or equal than.
- **`maxValueForMinInCol`** *(integer)*: Expect minimum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueStdDevToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestddevtobebetween
---
# columnValueStdDevToBeBetween
*This schema defines the test ColumnValueStdDevToBeBetween. Test the std. dev. value in a col is within a range.*
## Properties
- **`minValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
- **`maxValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesLengthsToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvalueslengthstobebetween
---
# columnValueLengthsToBeBetween
*This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. *
## Properties
- **`minLength`** *(integer)*: The {minLength} for the column length. If minLength is not included, maxLength is treated as upperBound and there will be no minimum number of rows.
- **`maxLength`** *(integer)*: The {maxLength} for the column length. if maxLength is not included, minLength is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,17 +0,0 @@
---
title: columnValuesMissingCountToBeEqual
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluesmissingcounttobeequal
---
# columnValuesMissingCount
*This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. *
## Properties
- **`missingCountValue`** *(integer)*: No.of missing values to be equal to.
- **`missingValueMatch`**: By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.
- **Items** *(string)*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesSumToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluessumtobebetween
---
# columnValuesSumToBeBetween
*This schema defines the test ColumnValuesSumToBeBetween. Test the sum of the values of a col is within a range.*
## Properties
- **`minValueForColSum`** *(integer)*: Expected sum of values in the column to be greater or equal than.
- **`maxValueForColSum`** *(integer)*: Expected sum values in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobebetween
---
# columnValuesToBeBetween
*This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. *
## Properties
- **`minValue`** *(integer)*: The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeinset
---
# ColumnValuesToBeInSet
*This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.*
## Properties
- **`allowedValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeNotInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotinset
---
# columnValuesToBeNotInSet
*This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. *
## Properties
- **`forbiddenValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeNotNull
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotnull
---
# columnValuesToBeNotNull
*This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. *
## Properties
- **`columnValuesToBeNotNull`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeUnique
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeunique
---
# columnValuesToBeUnique
*This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. *
## Properties
- **`columnValuesToBeUnique`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestomatchregex
---
# columnValuesToMatchRegex
*This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. *
## Properties
- **`regex`** *(string)*: The regular expression the column entries should match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToNotMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestonotmatchregex
---
# columnValuesToNotMatchRegex
*This schema defines the test ColumnValuesToNotMatchRegex. Test the values in a column to not match a given regular expression. *
## Properties
- **`forbiddenRegex`** *(string)*: The regular expression the column entries should not match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/column
---
# column
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/table
---
# table
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttobebetween
---
# TableColumnCountToBeBetween
*This schema defines the test TableColumnCountToBeBetween. Test the number of columns to be between min max value.*
## Properties
- **`minColValue`** *(integer)*: Expected number of columns should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of column.
- **`maxColValue`** *(integer)*: Expected number of columns should be less than or equal to {maxValue}. If maxValue is not included, minValue is treated as lowerBound and there will be no maximum number of column.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttoequal
---
# TableColumnCountToEqual
*This schema defines the test TableColumnCountToEqual. Test the number of columns equal to a value.*
## Properties
- **`columnCount`** *(integer)*: Expected number of columns to equal to a {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnNameToExist
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumnnametoexist
---
# TableColumnNameToExist
*This schema defines the test TableColumnNameToExist. Test the table columns exists in the table.*
## Properties
- **`columnName`** *(string)*: Expected column of the table to exist.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnToMatchSet
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumntomatchset
---
# TableColumnToMatchSet
*This schema defines the test TableColumnToMatchSet. Test the table columns match a set of values. Unordered by default.*
## Properties
- **`columnNames`** *(string)*: Expected columns of the table to match the ones in {columnValuesSet}.
- **`ordered`** *(boolean)*: Whether or not to considered the order of the list when performing the match. Default: `False`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableCustomSQLQuery
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecustomsqlquery
---
# tableCustomSQLQuery
*This schema defines the test TableCustomSQLQuery. Test if a custom SQL returns 1 or 0 row.*
## Properties
- **`sqlExpression`** *(string)*: SQL expression to run against the table.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableRowCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttobebetween
---
# TableRowCountToBeBetween
*This schema defines the test TableRowCountToBeBetween. Test the number of rows to between to two values.*
## Properties
- **`minValue`** *(integer)*: Expected number of rows should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: Expected number of rows should be lower than or equal to {maxValue}. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableRowCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttoequal
---
# TableRowCountToEqual
*This schema defines the test TableRowCountToEqual. Test the number of rows equal to a value.*
## Properties
- **`value`** *(integer)*: Expected number of rows {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMaxToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemaxtobebetween
---
# columnValueMaxToBeBetween
*This schema defines the test ColumnValueMaxToBeBetween. Test the maximum value in a col is within a range.*
## Properties
- **`minValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be greater or equal than.
- **`maxValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMeanToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemeantobebetween
---
# columnValueMeanToBeBetween
*This schema defines the test ColumnValueMeanToBeBetween. Test the mean value in a col is within a range.*
## Properties
- **`minValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
- **`maxValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMedianToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemediantobebetween
---
# columnValueMedianToBeBetween
*This schema defines the test ColumnValueMedianToBeBetween. Test the median value in a col is within a range.*
## Properties
- **`minValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
- **`maxValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMinToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemintobebetween
---
# columnValueMinToBeBetween
*This schema defines the test ColumnValueMinToBeBetween. Test the minimum value in a col is within a range.*
## Properties
- **`minValueForMinInCol`** *(integer)*: Expected minimum value in the column to be greater or equal than.
- **`maxValueForMinInCol`** *(integer)*: Expect minimum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueStdDevToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestddevtobebetween
---
# columnValueStdDevToBeBetween
*This schema defines the test ColumnValueStdDevToBeBetween. Test the std. dev. value in a col is within a range.*
## Properties
- **`minValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
- **`maxValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesLengthsToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvalueslengthstobebetween
---
# columnValueLengthsToBeBetween
*This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. *
## Properties
- **`minLength`** *(integer)*: The {minLength} for the column length. If minLength is not included, maxLength is treated as upperBound and there will be no minimum number of rows.
- **`maxLength`** *(integer)*: The {maxLength} for the column length. if maxLength is not included, minLength is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,17 +0,0 @@
---
title: columnValuesMissingCountToBeEqual
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluesmissingcounttobeequal
---
# columnValuesMissingCount
*This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. *
## Properties
- **`missingCountValue`** *(integer)*: No.of missing values to be equal to.
- **`missingValueMatch`**: By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.
- **Items** *(string)*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesSumToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluessumtobebetween
---
# columnValuesSumToBeBetween
*This schema defines the test ColumnValuesSumToBeBetween. Test the sum of the values of a col is within a range.*
## Properties
- **`minValueForColSum`** *(integer)*: Expected sum of values in the column to be greater or equal than.
- **`maxValueForColSum`** *(integer)*: Expected sum values in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobebetween
---
# columnValuesToBeBetween
*This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. *
## Properties
- **`minValue`** *(integer)*: The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeinset
---
# ColumnValuesToBeInSet
*This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.*
## Properties
- **`allowedValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeNotInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotinset
---
# columnValuesToBeNotInSet
*This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. *
## Properties
- **`forbiddenValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeNotNull
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotnull
---
# columnValuesToBeNotNull
*This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. *
## Properties
- **`columnValuesToBeNotNull`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeUnique
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeunique
---
# columnValuesToBeUnique
*This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. *
## Properties
- **`columnValuesToBeUnique`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestomatchregex
---
# columnValuesToMatchRegex
*This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. *
## Properties
- **`regex`** *(string)*: The regular expression the column entries should match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToNotMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestonotmatchregex
---
# columnValuesToNotMatchRegex
*This schema defines the test ColumnValuesToNotMatchRegex. Test the values in a column to not match a given regular expression. *
## Properties
- **`forbiddenRegex`** *(string)*: The regular expression the column entries should not match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/column
---
# column
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/table
---
# table
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttobebetween
---
# TableColumnCountToBeBetween
*This schema defines the test TableColumnCountToBeBetween. Test the number of columns to be between min max value.*
## Properties
- **`minColValue`** *(integer)*: Expected number of columns should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of column.
- **`maxColValue`** *(integer)*: Expected number of columns should be less than or equal to {maxValue}. If maxValue is not included, minValue is treated as lowerBound and there will be no maximum number of column.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttoequal
---
# TableColumnCountToEqual
*This schema defines the test TableColumnCountToEqual. Test the number of columns equal to a value.*
## Properties
- **`columnCount`** *(integer)*: Expected number of columns to equal to a {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnNameToExist
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumnnametoexist
---
# TableColumnNameToExist
*This schema defines the test TableColumnNameToExist. Test the table columns exists in the table.*
## Properties
- **`columnName`** *(string)*: Expected column of the table to exist.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnToMatchSet
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumntomatchset
---
# TableColumnToMatchSet
*This schema defines the test TableColumnToMatchSet. Test the table columns match a set of values. Unordered by default.*
## Properties
- **`columnNames`** *(string)*: Expected columns of the table to match the ones in {columnValuesSet}.
- **`ordered`** *(boolean)*: Whether or not to considered the order of the list when performing the match. Default: `False`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableCustomSQLQuery
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecustomsqlquery
---
# tableCustomSQLQuery
*This schema defines the test TableCustomSQLQuery. Test if a custom SQL returns 1 or 0 row.*
## Properties
- **`sqlExpression`** *(string)*: SQL expression to run against the table.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableRowCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttobebetween
---
# TableRowCountToBeBetween
*This schema defines the test TableRowCountToBeBetween. Test the number of rows to between to two values.*
## Properties
- **`minValue`** *(integer)*: Expected number of rows should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: Expected number of rows should be lower than or equal to {maxValue}. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableRowCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttoequal
---
# TableRowCountToEqual
*This schema defines the test TableRowCountToEqual. Test the number of rows equal to a value.*
## Properties
- **`value`** *(integer)*: Expected number of rows {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -661,9 +661,11 @@ Validate values form a set are present in a column.
**Behavior**
| Condition | Status |
| ----------- | ----------- |
|1 or more values from `allowedValues` is found in the column|Success ✅|
|0 value from `allowedValues` is found in the column|Failed ❌|
|------------------------------------------------------------------------------------------|-----------|
| `matchEnum` is `false` and 1 or more values from `allowedValues` is found in the column | Success ✅ |
| `matchEnum` is `true` and all columns have a value from `allowedValues` | Success ✅ |
| `matchEnum` is `false` 0 value from `allowedValues` is found in the column | Failed ❌ |
| `matchEnum` is `true` and 1 or more columns does not have a vluae from `allowedValues` | Failed ❌ |
**YAML Config**
@ -674,7 +676,33 @@ Validate values form a set are present in a column.
computePassedFailedRowCount: <true or false>
parameterValues:
- name: allowedValues
value: ["forbidden1", "forbidden2"]
value: '["forbidden1", "forbidden2"]'
- name: matchEnum
value: "" # or true
```
**JSON Config**
```json
{
"name": "myTestName",
"description": "test description",
"columnName": "columnName",
"testDefinitionName": "columnValuesToBeInSet",
"parameterValues": [
{
"name": "allowedValues",
"value": [
"forbidden1",
"forbidden2"
]
},
{
"name": "matchEnum",
"value": ""
}
]
}
```
**JSON Config**

View File

@ -13,6 +13,13 @@
"description": "An Array of values.",
"dataType": "ARRAY",
"required": true
},
{
"name": "matchEnum",
"displayName": "Match enum",
"description": "If enabled, validate that each value independently matches the enum.",
"dataType": "BOOLEAN",
"required": false
}
],
"supportsRowLevelPassedFailed": true,