datahub/metadata-ingestion/tests/unit/test_sql_utils.py

80 lines
2.7 KiB
Python

import pytest
from datahub.configuration.common import AllowDenyPattern
from datahub.ingestion.source.sql.sql_utils import (
check_table_with_profile_pattern,
gen_schema_key,
)
def test_guid_generators():
expected_guid = "f5e571e4a9acce86333e6b427ba1651f"
schema_key = gen_schema_key(
db_name="hive",
schema="db1",
platform="hive",
platform_instance="PROD",
env="PROD",
)
guid = schema_key.guid()
assert guid == expected_guid
test_profile_pattern_matching_on_table_allow_list_test_data = [
("db.table.column", "db.table", True),
("db.table.column2", "db.table", True),
("db.table..*", "db.table", True),
("db.*", "db.table", True),
("db.*", "db.table", True),
("db.*", "db.schema.table", True),
("db.schema.*", "db.schema.table", True),
("db\\.schema\\..*", "db.schema.table", True),
("db\\.schema\\.table\\.column_prefix.*", "db.schema.table", True),
("db\\.schema\\.table\\.column", "db.schema.table", True),
("db\\.schema\\.table2\\.column", "db.schema.table", False),
("db2\\.schema.*", "db.schema.table", False),
("db2\\.schema.*", "db.schema.table", False),
("db\\.schema\\.table\\..*", "db.table2", False),
]
@pytest.mark.parametrize(
"allow_pattern, table_name, result",
test_profile_pattern_matching_on_table_allow_list_test_data,
)
def test_profile_pattern_matching_on_table_allow_list(
allow_pattern: str, table_name: str, result: bool
) -> None:
pattern = AllowDenyPattern(allow=[allow_pattern])
assert check_table_with_profile_pattern(pattern, table_name) == result
test_profile_pattern_matching_on_table_deny_list_test_data = [
("db.table.column", "db.table", True),
("db.table.column2", "db.table", True),
("db.table..*", "db.table", True),
("db.*", "db.table", False),
("db.*", "db.table", False),
("db.*", "db.schema.table", False),
("db.schema.*", "db.schema.table", False),
("db\\.schema\\..*", "db.schema.table", False),
("db\\.schema\\.table\\.column_prefix.*", "db.schema.table", True),
("db\\.schema\\.table\\.column", "db.schema.table", True),
("db\\.schema\\.table2\\.column", "db.schema.table", True),
("db2\\.schema.*", "db.schema.table", True),
("db2\\.schema.*", "db.schema.table", True),
("db\\.schema\\.table\\..*", "db.table2", True),
]
@pytest.mark.parametrize(
"deny_pattern, table_name, result",
test_profile_pattern_matching_on_table_deny_list_test_data,
)
def test_profile_pattern_matching_on_table_deny_list(
deny_pattern: str, table_name: str, result: bool
) -> None:
pattern = AllowDenyPattern(deny=[deny_pattern])
assert check_table_with_profile_pattern(pattern, table_name) == result