Fixes 16305: Added Test Case for Matching Enum (#16362)

* Added Test Case for Matching Enum

1. Implemented the test case using the `matchEnum` parameter.
2. Added integration tests.
3. Added migrations.

* fix tests

* fixed tests

* format

* fixed tests

* clear search cache before running ingestion

* format

* changed scopt of aws fixture

* moved migrations to 1.5.0
This commit is contained in:
Imri Paran 2024-05-28 09:30:30 +02:00 committed by GitHub
parent d909a3141e
commit a4c516d2c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
63 changed files with 550 additions and 869 deletions

View File

@ -0,0 +1,11 @@
-- matchEnum Test Definition Parameter for columnValuesToBeInSet
UPDATE test_definition
set json = JSON_MERGE_PRESERVE(
json,
'{"parameterDefinition": ['
'{"name": "matchEnum", "displayName": "Match enum", "description": "If enabled, validate that each value independently matches the enum.", "dataType": "BOOLEAN", "required": false, "optionValues": []}'
']}'
)
WHERE name = 'columnValuesToBeInSet'
AND JSON_LENGTH(json, '$.parameterDefinition') < 2;

View File

@ -0,0 +1,8 @@
-- matchEnum Test Definition Parameter for columnValuesToBeInSet
UPDATE test_definition
SET json = jsonb_set(json, '{parameterDefinition}', json->'parameterDefinition' || '['
'{"name": "matchEnum", "displayName": "Match enum", "description": "If enabled, validate that each value independently matches the enum.", "dataType": "BOOLEAN", "required": false, "optionValues": []}'
']'::jsonb
)
WHERE name = 'columnValuesToBeInSet'
AND JSONB_ARRAY_LENGTH(json->'parameterDefinition') < 2;

View File

@ -50,9 +50,21 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator):
literal_eval, literal_eval,
) )
match_enum = self.get_test_case_param_value(
self.test_case.parameterValues, # type: ignore
"matchEnum",
bool,
default=False,
)
try: try:
column: Union[SQALikeColumn, Column] = self._get_column_name() column: Union[SQALikeColumn, Column] = self._get_column_name()
res = self._run_results(Metrics.COUNT_IN_SET, column, values=allowed_values) res = self._run_results(Metrics.COUNT_IN_SET, column, values=allowed_values)
if match_enum:
count = self._run_results(
Metrics.ROW_COUNT, column, values=allowed_values
)
res = count - res
except (ValueError, RuntimeError) as exc: except (ValueError, RuntimeError) as exc:
msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore msg = f"Error computing {self.test_case.fullyQualifiedName}: {exc}" # type: ignore
logger.debug(traceback.format_exc()) logger.debug(traceback.format_exc())
@ -71,7 +83,7 @@ class BaseColumnValuesToBeInSetValidator(BaseTestValidator):
return self.get_test_case_result_object( return self.get_test_case_result_object(
self.execution_date, self.execution_date,
self.get_test_case_status(res >= 1), self.get_test_case_status(res == 0 if match_enum else res >= 1),
f"Found countInSet={res}.", f"Found countInSet={res}.",
[TestResultValue(name=ALLOWED_VALUE_COUNT, value=str(res))], [TestResultValue(name=ALLOWED_VALUE_COUNT, value=str(res))],
row_count=row_count, row_count=row_count,

View File

@ -25,6 +25,9 @@ class LRUCache:
self._cache = OrderedDict() self._cache = OrderedDict()
self.capacity = capacity self.capacity = capacity
def clear(self):
self._cache = OrderedDict()
def get(self, key): def get(self, key):
""" """
Returns the value associated to `key` if it exists, Returns the value associated to `key` if it exists,

View File

@ -0,0 +1,216 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Datalake ingestion integration tests"""
import os
from copy import deepcopy
import boto3
import pytest
from moto import mock_s3
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.workflow.data_quality import TestSuiteWorkflow
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
BUCKET_NAME = "MyBucket"
INGESTION_CONFIG = {
"source": {
"type": "datalake",
"serviceName": "datalake_for_integration_tests",
"serviceConnection": {
"config": {
"type": "Datalake",
"configSource": {
"securityConfig": {
"awsAccessKeyId": "fake_access_key",
"awsSecretAccessKey": "fake_secret_key",
"awsRegion": "us-weat-1",
}
},
"bucketName": f"{BUCKET_NAME}",
}
},
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
DATA_QUALITY_CONFIG = {
"source": {
"type": "datalake",
"serviceName": "datalake_for_integration_tests",
"serviceConnection": {
"config": {
"type": "Datalake",
"configSource": {
"securityConfig": {
"awsAccessKeyId": "fake_access_key",
"awsSecretAccessKey": "fake_secret_key",
"awsRegion": "us-weat-1",
}
},
"bucketName": f"{BUCKET_NAME}",
}
},
"sourceConfig": {
"config": {
"type": "TestSuite",
"entityFullyQualifiedName": 'datalake_for_integration_tests.default.MyBucket."users.csv"',
}
},
},
"processor": {
"type": "orm-test-runner",
"config": {
"testCases": [
{
"name": "first_name_includes_john",
"testDefinitionName": "columnValuesToBeInSet",
"columnName": "first_name",
"parameterValues": [
{
"name": "allowedValues",
"value": "['John']",
}
],
},
{
"name": "first_name_is_john",
"testDefinitionName": "columnValuesToBeInSet",
"columnName": "first_name",
"parameterValues": [
{
"name": "allowedValues",
"value": "['John']",
},
{
"name": "matchEnum",
"value": "True",
},
],
},
]
},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"loggerLevel": "DEBUG",
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
},
},
}
@pytest.fixture(scope="module", autouse=True)
def aws():
with mock_s3():
yield boto3.client("s3", region_name="us-east-1")
@pytest.fixture(scope="class", autouse=True)
def setup_s3(request) -> None:
# Mock our S3 bucket and ingest a file
boto3.DEFAULT_SESSION = None
request.cls.s3_client = boto3.client(
"s3",
region_name="us-weat-1",
)
s3 = boto3.resource(
"s3",
region_name="us-west-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
request.cls.s3_client.create_bucket(
Bucket=BUCKET_NAME,
CreateBucketConfiguration={"LocationConstraint": "us-west-1"},
)
s3.meta.client.head_bucket(Bucket=BUCKET_NAME)
current_dir = os.path.dirname(__file__)
resources_dir = os.path.join(current_dir, "resources")
resources_paths = [
os.path.join(path, filename)
for path, _, files in os.walk(resources_dir)
for filename in files
]
request.cls.s3_keys = []
for path in resources_paths:
key = os.path.relpath(path, resources_dir)
request.cls.s3_keys.append(key)
request.cls.s3_client.upload_file(Filename=path, Bucket=BUCKET_NAME, Key=key)
yield
bucket = s3.Bucket(BUCKET_NAME)
for key in bucket.objects.all():
key.delete()
bucket.delete()
@pytest.fixture(scope="class")
def run_ingestion(metadata):
ingestion_workflow = MetadataWorkflow.create(INGESTION_CONFIG)
ingestion_workflow.execute()
ingestion_workflow.raise_from_status()
ingestion_workflow.stop()
yield
db_service = metadata.get_by_name(
entity=DatabaseService, fqn="datalake_for_integration_tests"
)
metadata.delete(DatabaseService, db_service.id, recursive=True, hard_delete=True)
@pytest.fixture
def run_test_suite_workflow(run_ingestion):
ingestion_workflow = TestSuiteWorkflow.create(DATA_QUALITY_CONFIG)
ingestion_workflow.execute()
ingestion_workflow.raise_from_status()
ingestion_workflow.stop()
@pytest.fixture()
def run_profiler(run_ingestion):
"""Test profiler ingestion"""
workflow_config = deepcopy(INGESTION_CONFIG)
workflow_config["source"]["sourceConfig"]["config"].update(
{
"type": "Profiler",
}
)
workflow_config["processor"] = {
"type": "orm-profiler",
"config": {},
}
profiler_workflow = ProfilerWorkflow.create(workflow_config)
profiler_workflow.execute()
profiler_workflow.raise_from_status()
profiler_workflow.stop()

View File

@ -0,0 +1,27 @@
from typing import List
import pytest
from metadata.generated.schema.tests.basic import TestCaseStatus
from metadata.generated.schema.tests.testCase import TestCase
class TestDataQuality:
@pytest.mark.parametrize(
"test_case_name,expected_status",
[
("first_name_includes_john", TestCaseStatus.Success),
("first_name_is_john", TestCaseStatus.Failed),
],
)
def test_data_quality(
self, run_test_suite_workflow, metadata, test_case_name, expected_status
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.__root__ == test_case_name), None
)
assert test_case is not None
assert test_case.testCaseResult.testCaseStatus == expected_status

View File

@ -11,137 +11,35 @@
"""Datalake ingestion integration tests""" """Datalake ingestion integration tests"""
import os
from copy import deepcopy
from unittest import TestCase
import boto3
import botocore
import pytest import pytest
from moto import mock_s3
from metadata.generated.schema.entity.data.table import DataType, Table from metadata.generated.schema.entity.data.table import DataType, Table
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
OpenMetadataJWTClientConfig,
)
from metadata.ingestion.ometa.models import EntityList from metadata.ingestion.ometa.models import EntityList
from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
BUCKET_NAME = "MyBucket"
INGESTION_CONFIG = {
"source": {
"type": "datalake",
"serviceName": "datalake_for_integration_tests",
"serviceConnection": {
"config": {
"type": "Datalake",
"configSource": {
"securityConfig": {
"awsAccessKeyId": "fake_access_key",
"awsSecretAccessKey": "fake_secret_key",
"awsRegion": "us-weat-1",
}
},
"bucketName": f"{BUCKET_NAME}",
}
},
"sourceConfig": {"config": {"type": "DatabaseMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {
"jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
},
}
},
}
@mock_s3 class TestDatalake:
class DatalakeTestE2E(TestCase):
"""datalake profiler E2E test""" """datalake profiler E2E test"""
@classmethod metadata: OpenMetadata = None
def setUpClass(cls) -> None: s3_client = None
server_config = OpenMetadataConnection(
hostPort="http://localhost:8585/api",
authProvider="openmetadata",
securityConfig=OpenMetadataJWTClientConfig(
jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
),
) # type: ignore
cls.metadata = OpenMetadata(server_config)
def setUp(self) -> None: @pytest.fixture(autouse=True)
# Mock our S3 bucket and ingest a file def set_metdata(self, metadata):
boto3.DEFAULT_SESSION = None self.metadata = metadata
self.client = boto3.client(
"s3",
region_name="us-weat-1",
)
# check that we are not running our test against a real bucket
try:
s3 = boto3.resource(
"s3",
region_name="us-west-1",
aws_access_key_id="fake_access_key",
aws_secret_access_key="fake_secret_key",
)
s3.meta.client.head_bucket(Bucket=BUCKET_NAME)
except botocore.exceptions.ClientError:
pass
else:
err = f"{BUCKET_NAME} should not exist."
raise EnvironmentError(err)
self.client.create_bucket(
Bucket=BUCKET_NAME,
CreateBucketConfiguration={"LocationConstraint": "us-west-1"},
)
current_dir = os.path.dirname(__file__)
resources_dir = os.path.join(current_dir, "resources")
resources_paths = [
os.path.join(path, filename)
for path, _, files in os.walk(resources_dir)
for filename in files
]
self.s3_keys = []
for path in resources_paths:
key = os.path.relpath(path, resources_dir)
self.s3_keys.append(key)
self.client.upload_file(Filename=path, Bucket=BUCKET_NAME, Key=key)
@pytest.mark.order(10000) @pytest.mark.order(10000)
def test_ingestion(self): def test_ingestion(self, run_ingestion):
"""test ingestion of datalake data""" """test ingestion of datalake data"""
# Ingest our S3 data # Ingest our S3 data
ingestion_workflow = MetadataWorkflow.create(INGESTION_CONFIG)
ingestion_workflow.execute()
ingestion_workflow.raise_from_status()
ingestion_workflow.stop()
resp: EntityList[Table] = self.metadata.list_entities( resp: EntityList[Table] = self.metadata.list_entities(
entity=Table, params={"database": "datalake_for_integration_tests.default"} entity=Table, params={"database": "datalake_for_integration_tests.default"}
) # type: ignore ) # type: ignore
entities = resp.entities entities = resp.entities
self.assertEqual(len(entities), 3) assert len(entities) == 3
names = [entity.name.__root__ for entity in entities] names = [entity.name.__root__ for entity in entities]
self.assertListEqual( assert sorted(["names.json", "new_users.parquet", "users.csv"]) == sorted(names)
sorted(["names.json", "new_users.parquet", "users.csv"]), sorted(names)
)
for entity in entities: for entity in entities:
columns = entity.columns columns = entity.columns
@ -149,27 +47,7 @@ class DatalakeTestE2E(TestCase):
if column.dataType == DataType.JSON: if column.dataType == DataType.JSON:
assert column.children assert column.children
@pytest.mark.order(10001) def test_profiler(self, run_profiler):
def test_profiler(self):
"""Test profiler ingestion"""
workflow_config = deepcopy(INGESTION_CONFIG)
workflow_config["source"]["sourceConfig"]["config"].update(
{
"type": "Profiler",
}
)
workflow_config["processor"] = {
"type": "orm-profiler",
"config": {},
}
profiler_workflow = ProfilerWorkflow.create(workflow_config)
profiler_workflow.execute()
status = profiler_workflow.result_status()
profiler_workflow.stop()
assert status == 0
csv_ = self.metadata.get_by_name( csv_ = self.metadata.get_by_name(
entity=Table, entity=Table,
fqn='datalake_for_integration_tests.default.MyBucket."users.csv"', fqn='datalake_for_integration_tests.default.MyBucket."users.csv"',

View File

@ -1,11 +1,55 @@
import contextlib
import logging
import os import os
import tarfile import tarfile
import zipfile import zipfile
from subprocess import CalledProcessError from subprocess import CalledProcessError
import docker
import pytest import pytest
from testcontainers.postgres import PostgresContainer from testcontainers.postgres import PostgresContainer
from metadata.generated.schema.api.services.createDatabaseService import (
CreateDatabaseServiceRequest,
)
from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
BasicAuth,
)
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
)
from metadata.generated.schema.entity.services.databaseService import (
DatabaseConnection,
DatabaseService,
DatabaseServiceType,
)
from metadata.generated.schema.metadataIngestion.workflow import (
OpenMetadataWorkflowConfig,
Sink,
Source,
SourceConfig,
WorkflowConfig,
)
from metadata.ingestion.lineage.sql_lineage import search_cache
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.metadata import MetadataWorkflow
@pytest.fixture(autouse=True, scope="session")
def config_logging():
logging.getLogger("sqlfluff").setLevel(logging.CRITICAL)
@contextlib.contextmanager
def try_bind(container, container_port, host_port):
try:
with container.with_bind_ports(container_port, host_port) as container:
yield container
except docker.errors.APIError:
logging.warning("Port %s is already in use, trying another port", host_port)
with container.with_bind_ports(container_port, None) as container:
yield container
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def postgres_container(tmp_path_factory): def postgres_container(tmp_path_factory):
@ -25,7 +69,9 @@ def postgres_container(tmp_path_factory):
"pg_stat_statements.track=all", "pg_stat_statements.track=all",
] ]
with container as container: with try_bind(container, 5432, 5432) if not os.getenv(
"CI"
) else container as container:
docker_container = container.get_wrapped_container() docker_container = container.get_wrapped_container()
docker_container.exec_run(["mkdir", "/data"]) docker_container.exec_run(["mkdir", "/data"])
docker_container.put_archive( docker_container.put_archive(
@ -57,3 +103,47 @@ def postgres_container(tmp_path_factory):
returncode=res[0], cmd=res, output=res[1].decode("utf-8") returncode=res[0], cmd=res, output=res[1].decode("utf-8")
) )
yield container yield container
@pytest.fixture(scope="module")
def db_service(metadata, postgres_container):
service = CreateDatabaseServiceRequest(
name="docker_test_db",
serviceType=DatabaseServiceType.Postgres,
connection=DatabaseConnection(
config=PostgresConnection(
username=postgres_container.username,
authType=BasicAuth(password=postgres_container.password),
hostPort="localhost:"
+ postgres_container.get_exposed_port(postgres_container.port),
database="dvdrental",
)
),
)
service_entity = metadata.create_or_update(data=service)
service_entity.connection.config.authType.password = postgres_container.password
yield service_entity
metadata.delete(
DatabaseService, service_entity.id, recursive=True, hard_delete=True
)
@pytest.fixture(scope="module")
def ingest_metadata(db_service, metadata: OpenMetadata):
workflow_config = OpenMetadataWorkflowConfig(
source=Source(
type=db_service.connection.config.type.value.lower(),
serviceName=db_service.fullyQualifiedName.__root__,
serviceConnection=db_service.connection,
sourceConfig=SourceConfig(config={}),
),
sink=Sink(
type="metadata-rest",
config={},
),
workflowConfig=WorkflowConfig(openMetadataServerConfig=metadata.config),
)
metadata_ingestion = MetadataWorkflow.create(workflow_config)
search_cache.clear()
metadata_ingestion.execute()
metadata_ingestion.raise_from_status()

View File

@ -0,0 +1,126 @@
import sys
from typing import Collection, List, Tuple
import pytest
from metadata.data_quality.api.models import (
TestCaseDefinition,
TestCaseParameterValue,
TestSuiteProcessorConfig,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseService
from metadata.generated.schema.metadataIngestion.testSuitePipeline import (
TestSuiteConfigType,
TestSuitePipeline,
)
from metadata.generated.schema.metadataIngestion.workflow import (
LogLevels,
OpenMetadataWorkflowConfig,
Processor,
Sink,
Source,
SourceConfig,
WorkflowConfig,
)
from metadata.generated.schema.tests.basic import TestCaseStatus
from metadata.generated.schema.tests.testCase import TestCase
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.workflow.data_quality import TestSuiteWorkflow
if not sys.version_info >= (3, 9):
pytest.skip("requires python 3.9+", allow_module_level=True)
def parameteres_from_tuples(
tup: Collection[Tuple[str, str]]
) -> List[TestCaseParameterValue]:
return [TestCaseParameterValue(name=v[0], value=v[1]) for v in tup]
@pytest.fixture(scope="module")
def run_data_quality_workflow(
ingest_metadata, db_service: DatabaseService, metadata: OpenMetadata
):
workflow_config = OpenMetadataWorkflowConfig(
source=Source(
type=TestSuiteConfigType.TestSuite.value,
serviceName="MyTestSuite",
sourceConfig=SourceConfig(
config=TestSuitePipeline(
type=TestSuiteConfigType.TestSuite,
entityFullyQualifiedName=f"{db_service.fullyQualifiedName.__root__}.dvdrental.public.customer",
)
),
serviceConnection=db_service.connection,
),
processor=Processor(
type="orm-test-runner",
config=TestSuiteProcessorConfig(
testCases=[
TestCaseDefinition(
name="first_name_includes_tom_and_jerry_wo_enum",
testDefinitionName="columnValuesToBeInSet",
columnName="first_name",
parameterValues=parameteres_from_tuples(
[
("allowedValues", "['Tom', 'Jerry']"),
]
),
),
TestCaseDefinition(
name="first_name_includes_tom_and_jerry",
testDefinitionName="columnValuesToBeInSet",
columnName="first_name",
parameterValues=parameteres_from_tuples(
[
("allowedValues", "['Tom', 'Jerry']"),
("matchEnum", ""),
]
),
),
TestCaseDefinition(
name="first_name_is_tom_or_jerry",
testDefinitionName="columnValuesToBeInSet",
columnName="first_name",
parameterValues=parameteres_from_tuples(
[
("allowedValues", "['Tom', 'Jerry']"),
("matchEnum", "True"),
]
),
),
]
),
),
sink=Sink(
type="metadata-rest",
config={},
),
workflowConfig=WorkflowConfig(
loggerLevel=LogLevels.DEBUG, openMetadataServerConfig=metadata.config
),
)
test_suite_procesor = TestSuiteWorkflow.create(workflow_config)
test_suite_procesor.execute()
test_suite_procesor.raise_from_status()
@pytest.mark.parametrize(
"test_case_name,expected_status",
[
("first_name_includes_tom_and_jerry_wo_enum", TestCaseStatus.Success),
("first_name_includes_tom_and_jerry", TestCaseStatus.Success),
("first_name_is_tom_or_jerry", TestCaseStatus.Failed),
],
)
def test_data_quality(
run_data_quality_workflow, metadata: OpenMetadata, test_case_name, expected_status
):
test_cases: List[TestCase] = metadata.list_entities(
TestCase, fields=["*"], skip_on_failure=True
).entities
test_case: TestCase = next(
(t for t in test_cases if t.name.__root__ == test_case_name), None
)
assert test_case is not None
assert test_case.testCaseResult.testCaseStatus == expected_status

View File

@ -1,4 +1,3 @@
import logging
import sys import sys
import pytest import pytest
@ -36,6 +35,7 @@ from metadata.generated.schema.metadataIngestion.workflow import (
SourceConfig, SourceConfig,
WorkflowConfig, WorkflowConfig,
) )
from metadata.ingestion.lineage.sql_lineage import search_cache
from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.profiler.api.models import ProfilerProcessorConfig from metadata.profiler.api.models import ProfilerProcessorConfig
from metadata.workflow.metadata import MetadataWorkflow from metadata.workflow.metadata import MetadataWorkflow
@ -46,11 +46,6 @@ if not sys.version_info >= (3, 9):
pytest.skip("requires python 3.9+", allow_module_level=True) pytest.skip("requires python 3.9+", allow_module_level=True)
@pytest.fixture(autouse=True)
def config_logging():
logging.getLogger("sqlfluff").setLevel(logging.CRITICAL)
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def db_service(metadata, postgres_container): def db_service(metadata, postgres_container):
service = CreateDatabaseServiceRequest( service = CreateDatabaseServiceRequest(
@ -136,6 +131,7 @@ def run_profiler_workflow(ingest_metadata, db_service, metadata):
), ),
) )
metadata_ingestion = ProfilerWorkflow.create(workflow_config.dict()) metadata_ingestion = ProfilerWorkflow.create(workflow_config.dict())
search_cache.clear()
metadata_ingestion.execute() metadata_ingestion.execute()
return return
@ -171,6 +167,7 @@ def ingest_query_usage(ingest_metadata, db_service, metadata):
}, },
} }
workflow = UsageWorkflow.create(workflow_config) workflow = UsageWorkflow.create(workflow_config)
search_cache.clear()
workflow.execute() workflow.execute()
workflow.raise_from_status() workflow.raise_from_status()
return return

View File

@ -68,7 +68,7 @@ class TestSecretsManagerFactory(TestCase):
@patch.dict(os.environ, {"AZURE_KEY_VAULT_NAME": "test"}) @patch.dict(os.environ, {"AZURE_KEY_VAULT_NAME": "test"})
@patch("metadata.clients.aws_client.boto3") @patch("metadata.clients.aws_client.boto3")
def test_all_providers_has_implementation(self, mocked_boto3): def test_all_providers_has_implementation(self, mocked_boto3):
mocked_boto3.client.return_value = {} mocked_boto3.s3_client.return_value = {}
secret_manager_providers = [ secret_manager_providers = [
secret_manager_provider secret_manager_provider
for secret_manager_provider in SecretsManagerProvider for secret_manager_provider in SecretsManagerProvider

View File

@ -1,16 +0,0 @@
---
title: columnValueMaxToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemaxtobebetween
---
# columnValueMaxToBeBetween
*This schema defines the test ColumnValueMaxToBeBetween. Test the maximum value in a col is within a range.*
## Properties
- **`minValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be greater or equal than.
- **`maxValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMeanToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemeantobebetween
---
# columnValueMeanToBeBetween
*This schema defines the test ColumnValueMeanToBeBetween. Test the mean value in a col is within a range.*
## Properties
- **`minValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
- **`maxValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMedianToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemediantobebetween
---
# columnValueMedianToBeBetween
*This schema defines the test ColumnValueMedianToBeBetween. Test the median value in a col is within a range.*
## Properties
- **`minValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
- **`maxValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMinToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemintobebetween
---
# columnValueMinToBeBetween
*This schema defines the test ColumnValueMinToBeBetween. Test the minimum value in a col is within a range.*
## Properties
- **`minValueForMinInCol`** *(integer)*: Expected minimum value in the column to be greater or equal than.
- **`maxValueForMinInCol`** *(integer)*: Expect minimum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueStdDevToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestddevtobebetween
---
# columnValueStdDevToBeBetween
*This schema defines the test ColumnValueStdDevToBeBetween. Test the std. dev. value in a col is within a range.*
## Properties
- **`minValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
- **`maxValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesLengthsToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvalueslengthstobebetween
---
# columnValueLengthsToBeBetween
*This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. *
## Properties
- **`minLength`** *(integer)*: The {minLength} for the column length. If minLength is not included, maxLength is treated as upperBound and there will be no minimum number of rows.
- **`maxLength`** *(integer)*: The {maxLength} for the column length. if maxLength is not included, minLength is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,17 +0,0 @@
---
title: columnValuesMissingCountToBeEqual
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluesmissingcounttobeequal
---
# columnValuesMissingCount
*This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. *
## Properties
- **`missingCountValue`** *(integer)*: No.of missing values to be equal to.
- **`missingValueMatch`**: By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.
- **Items** *(string)*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesSumToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluessumtobebetween
---
# columnValuesSumToBeBetween
*This schema defines the test ColumnValuesSumToBeBetween. Test the sum of the values of a col is within a range.*
## Properties
- **`minValueForColSum`** *(integer)*: Expected sum of values in the column to be greater or equal than.
- **`maxValueForColSum`** *(integer)*: Expected sum values in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobebetween
---
# columnValuesToBeBetween
*This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. *
## Properties
- **`minValue`** *(integer)*: The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeinset
---
# ColumnValuesToBeInSet
*This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.*
## Properties
- **`allowedValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeNotInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotinset
---
# columnValuesToBeNotInSet
*This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. *
## Properties
- **`forbiddenValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeNotNull
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotnull
---
# columnValuesToBeNotNull
*This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. *
## Properties
- **`columnValuesToBeNotNull`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeUnique
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeunique
---
# columnValuesToBeUnique
*This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. *
## Properties
- **`columnValuesToBeUnique`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestomatchregex
---
# columnValuesToMatchRegex
*This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. *
## Properties
- **`regex`** *(string)*: The regular expression the column entries should match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToNotMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestonotmatchregex
---
# columnValuesToNotMatchRegex
*This schema defines the test ColumnValuesToNotMatchRegex. Test the values in a column to not match a given regular expression. *
## Properties
- **`forbiddenRegex`** *(string)*: The regular expression the column entries should not match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/column
---
# column
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/table
---
# table
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttobebetween
---
# TableColumnCountToBeBetween
*This schema defines the test TableColumnCountToBeBetween. Test the number of columns to be between min max value.*
## Properties
- **`minColValue`** *(integer)*: Expected number of columns should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of column.
- **`maxColValue`** *(integer)*: Expected number of columns should be less than or equal to {maxValue}. If maxValue is not included, minValue is treated as lowerBound and there will be no maximum number of column.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttoequal
---
# TableColumnCountToEqual
*This schema defines the test TableColumnCountToEqual. Test the number of columns equal to a value.*
## Properties
- **`columnCount`** *(integer)*: Expected number of columns to equal to a {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnNameToExist
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumnnametoexist
---
# TableColumnNameToExist
*This schema defines the test TableColumnNameToExist. Test the table columns exists in the table.*
## Properties
- **`columnName`** *(string)*: Expected column of the table to exist.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnToMatchSet
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumntomatchset
---
# TableColumnToMatchSet
*This schema defines the test TableColumnToMatchSet. Test the table columns match a set of values. Unordered by default.*
## Properties
- **`columnNames`** *(string)*: Expected columns of the table to match the ones in {columnValuesSet}.
- **`ordered`** *(boolean)*: Whether or not to considered the order of the list when performing the match. Default: `False`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableCustomSQLQuery
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecustomsqlquery
---
# tableCustomSQLQuery
*This schema defines the test TableCustomSQLQuery. Test if a custom SQL returns 1 or 0 row.*
## Properties
- **`sqlExpression`** *(string)*: SQL expression to run against the table.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableRowCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttobebetween
---
# TableRowCountToBeBetween
*This schema defines the test TableRowCountToBeBetween. Test the number of rows to between to two values.*
## Properties
- **`minValue`** *(integer)*: Expected number of rows should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: Expected number of rows should be lower than or equal to {maxValue}. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableRowCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttoequal
---
# TableRowCountToEqual
*This schema defines the test TableRowCountToEqual. Test the number of rows equal to a value.*
## Properties
- **`value`** *(integer)*: Expected number of rows {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMaxToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemaxtobebetween
---
# columnValueMaxToBeBetween
*This schema defines the test ColumnValueMaxToBeBetween. Test the maximum value in a col is within a range.*
## Properties
- **`minValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be greater or equal than.
- **`maxValueForMaxInCol`** *(integer)*: Expected maximum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMeanToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemeantobebetween
---
# columnValueMeanToBeBetween
*This schema defines the test ColumnValueMeanToBeBetween. Test the mean value in a col is within a range.*
## Properties
- **`minValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
- **`maxValueForMeanInCol`** *(integer)*: Expected mean value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMedianToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemediantobebetween
---
# columnValueMedianToBeBetween
*This schema defines the test ColumnValueMedianToBeBetween. Test the median value in a col is within a range.*
## Properties
- **`minValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
- **`maxValueForMedianInCol`** *(integer)*: Expected median value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueMinToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluemintobebetween
---
# columnValueMinToBeBetween
*This schema defines the test ColumnValueMinToBeBetween. Test the minimum value in a col is within a range.*
## Properties
- **`minValueForMinInCol`** *(integer)*: Expected minimum value in the column to be greater or equal than.
- **`maxValueForMinInCol`** *(integer)*: Expect minimum value in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValueStdDevToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestddevtobebetween
---
# columnValueStdDevToBeBetween
*This schema defines the test ColumnValueStdDevToBeBetween. Test the std. dev. value in a col is within a range.*
## Properties
- **`minValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
- **`maxValueForStdDevInCol`** *(integer)*: Expected std. dev value for the column to be greater or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesLengthsToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvalueslengthstobebetween
---
# columnValueLengthsToBeBetween
*This schema defines the test ColumnValueLengthsToBeBetween. Test the value lengths in a column to be between minimum and maximum value. *
## Properties
- **`minLength`** *(integer)*: The {minLength} for the column length. If minLength is not included, maxLength is treated as upperBound and there will be no minimum number of rows.
- **`maxLength`** *(integer)*: The {maxLength} for the column length. if maxLength is not included, minLength is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,17 +0,0 @@
---
title: columnValuesMissingCountToBeEqual
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluesmissingcounttobeequal
---
# columnValuesMissingCount
*This schema defines the test ColumnValuesMissingCount. Test the column values missing count to be equal to given number. *
## Properties
- **`missingCountValue`** *(integer)*: No.of missing values to be equal to.
- **`missingValueMatch`**: By default match all null and empty values to be missing. This field allows us to configure additional strings such as N/A, NULL as missing strings as well.
- **Items** *(string)*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesSumToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluessumtobebetween
---
# columnValuesSumToBeBetween
*This schema defines the test ColumnValuesSumToBeBetween. Test the sum of the values of a col is within a range.*
## Properties
- **`minValueForColSum`** *(integer)*: Expected sum of values in the column to be greater or equal than.
- **`maxValueForColSum`** *(integer)*: Expected sum values in the column to be lower or equal than.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobebetween
---
# columnValuesToBeBetween
*This schema defines the test ColumnValuesToBeBetween. Test the values in a column to be between minimum and maximum value. *
## Properties
- **`minValue`** *(integer)*: The {minValue} value for the column entry. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: The {maxValue} value for the column entry. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeinset
---
# ColumnValuesToBeInSet
*This schema defines the test ColumnValuesToBeInSet. Test the column values are in the set.*
## Properties
- **`allowedValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: columnValuesToBeNotInSet
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotinset
---
# columnValuesToBeNotInSet
*This schema defines the test ColumnValuesToBeNotInSet. Test the column values to not be in the set. *
## Properties
- **`forbiddenValues`** *(array)*: An Array of values.
- **Items** *(['string', 'number'])*
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeNotNull
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobenotnull
---
# columnValuesToBeNotNull
*This schema defines the test ColumnValuesToBeNotNull. Test the number of values in a column are null. Values must be explicitly null. Empty strings don't count as null. *
## Properties
- **`columnValuesToBeNotNull`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToBeUnique
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestobeunique
---
# columnValuesToBeUnique
*This schema defines the test ColumnValuesToBeUnique. Test the values in a column to be unique. *
## Properties
- **`columnValuesToBeUnique`** *(boolean)*: Default: `True`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestomatchregex
---
# columnValuesToMatchRegex
*This schema defines the test ColumnValuesToMatchRegex. Test the values in a column to match a given regular expression. *
## Properties
- **`regex`** *(string)*: The regular expression the column entries should match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: columnValuesToNotMatchRegex
slug: /main-concepts/metadata-standard/schemas/tests/column/columnvaluestonotmatchregex
---
# columnValuesToNotMatchRegex
*This schema defines the test ColumnValuesToNotMatchRegex. Test the values in a column to not match a given regular expression. *
## Properties
- **`forbiddenRegex`** *(string)*: The regular expression the column entries should not match.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/column
---
# column
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,8 +0,0 @@
---
title: index
slug: /main-concepts/metadata-standard/schemas/tests/table
---
# table
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttobebetween
---
# TableColumnCountToBeBetween
*This schema defines the test TableColumnCountToBeBetween. Test the number of columns to be between min max value.*
## Properties
- **`minColValue`** *(integer)*: Expected number of columns should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of column.
- **`maxColValue`** *(integer)*: Expected number of columns should be less than or equal to {maxValue}. If maxValue is not included, minValue is treated as lowerBound and there will be no maximum number of column.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumncounttoequal
---
# TableColumnCountToEqual
*This schema defines the test TableColumnCountToEqual. Test the number of columns equal to a value.*
## Properties
- **`columnCount`** *(integer)*: Expected number of columns to equal to a {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableColumnNameToExist
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumnnametoexist
---
# TableColumnNameToExist
*This schema defines the test TableColumnNameToExist. Test the table columns exists in the table.*
## Properties
- **`columnName`** *(string)*: Expected column of the table to exist.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableColumnToMatchSet
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecolumntomatchset
---
# TableColumnToMatchSet
*This schema defines the test TableColumnToMatchSet. Test the table columns match a set of values. Unordered by default.*
## Properties
- **`columnNames`** *(string)*: Expected columns of the table to match the ones in {columnValuesSet}.
- **`ordered`** *(boolean)*: Whether or not to considered the order of the list when performing the match. Default: `False`.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableCustomSQLQuery
slug: /main-concepts/metadata-standard/schemas/tests/table/tablecustomsqlquery
---
# tableCustomSQLQuery
*This schema defines the test TableCustomSQLQuery. Test if a custom SQL returns 1 or 0 row.*
## Properties
- **`sqlExpression`** *(string)*: SQL expression to run against the table.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,16 +0,0 @@
---
title: tableRowCountToBeBetween
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttobebetween
---
# TableRowCountToBeBetween
*This schema defines the test TableRowCountToBeBetween. Test the number of rows to between to two values.*
## Properties
- **`minValue`** *(integer)*: Expected number of rows should be greater than or equal to {minValue}. If minValue is not included, maxValue is treated as upperBound and there will be no minimum number of rows.
- **`maxValue`** *(integer)*: Expected number of rows should be lower than or equal to {maxValue}. if maxValue is not included, minValue is treated as lowerBound and there will eb no maximum number of rows.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -1,15 +0,0 @@
---
title: tableRowCountToEqual
slug: /main-concepts/metadata-standard/schemas/tests/table/tablerowcounttoequal
---
# TableRowCountToEqual
*This schema defines the test TableRowCountToEqual. Test the number of rows equal to a value.*
## Properties
- **`value`** *(integer)*: Expected number of rows {value}.
Documentation file automatically generated at 2022-07-14 10:51:34.749986.

View File

@ -660,10 +660,12 @@ Validate values form a set are present in a column.
**Behavior** **Behavior**
| Condition | Status | | Condition | Status |
| ----------- | ----------- | |------------------------------------------------------------------------------------------|-----------|
|1 or more values from `allowedValues` is found in the column|Success ✅| | `matchEnum` is `false` and 1 or more values from `allowedValues` is found in the column | Success ✅ |
|0 value from `allowedValues` is found in the column|Failed ❌| | `matchEnum` is `true` and all columns have a value from `allowedValues` | Success ✅ |
| `matchEnum` is `false` 0 value from `allowedValues` is found in the column | Failed ❌ |
| `matchEnum` is `true` and 1 or more columns does not have a vluae from `allowedValues` | Failed ❌ |
**YAML Config** **YAML Config**
@ -673,8 +675,34 @@ Validate values form a set are present in a column.
columnName: columnName columnName: columnName
computePassedFailedRowCount: <true or false> computePassedFailedRowCount: <true or false>
parameterValues: parameterValues:
- name: allowedValues - name: allowedValues
value: ["forbidden1", "forbidden2"] value: '["forbidden1", "forbidden2"]'
- name: matchEnum
value: "" # or true
```
**JSON Config**
```json
{
"name": "myTestName",
"description": "test description",
"columnName": "columnName",
"testDefinitionName": "columnValuesToBeInSet",
"parameterValues": [
{
"name": "allowedValues",
"value": [
"forbidden1",
"forbidden2"
]
},
{
"name": "matchEnum",
"value": ""
}
]
}
``` ```
**JSON Config** **JSON Config**

View File

@ -13,6 +13,13 @@
"description": "An Array of values.", "description": "An Array of values.",
"dataType": "ARRAY", "dataType": "ARRAY",
"required": true "required": true
},
{
"name": "matchEnum",
"displayName": "Match enum",
"description": "If enabled, validate that each value independently matches the enum.",
"dataType": "BOOLEAN",
"required": false
} }
], ],
"supportsRowLevelPassedFailed": true, "supportsRowLevelPassedFailed": true,