mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-31 05:18:18 +00:00
413 lines
15 KiB
Python
413 lines
15 KiB
Python
"""
|
|
Tests for ExternalTag and UnityCatalogTag classes.
|
|
"""
|
|
|
|
from datahub.api.entities.external.external_tag import (
|
|
ExternalTag,
|
|
)
|
|
from datahub.api.entities.external.restricted_text import RestrictedText
|
|
from datahub.api.entities.external.unity_catalog_external_entites import (
|
|
UnityCatalogTag,
|
|
UnityCatalogTagKeyText,
|
|
UnityCatalogTagValueText,
|
|
)
|
|
from datahub.metadata.urns import TagUrn
|
|
|
|
|
|
class TestRestrictedText:
|
|
"""Tests for RestrictedText base functionality."""
|
|
|
|
def test_basic_functionality(self):
|
|
"""Test basic RestrictedText creation and processing."""
|
|
text = RestrictedText("Hello World! This is a test.")
|
|
assert str(text) == "Hello_World!_This_is_a_test."
|
|
assert text.original == "Hello World! This is a test."
|
|
assert text.processed == "Hello_World!_This_is_a_test."
|
|
|
|
def test_truncation(self):
|
|
"""Test text truncation with default settings."""
|
|
long_text = "A" * 60 # Longer than default 50 chars
|
|
text = RestrictedText(long_text)
|
|
assert len(str(text)) == 50
|
|
assert str(text).endswith("...")
|
|
assert text.original == long_text
|
|
|
|
def test_custom_configuration(self):
|
|
"""Test RestrictedText with custom configuration."""
|
|
text = RestrictedText("hello-world.test")
|
|
text._configure(
|
|
max_length=10,
|
|
replace_chars={"-", "."},
|
|
replacement_char="_",
|
|
truncation_suffix="...",
|
|
)
|
|
assert str(text) == "hello_w..."
|
|
assert text.original == "hello-world.test"
|
|
|
|
|
|
class TestExternalTag:
|
|
"""Tests for ExternalTag class."""
|
|
|
|
def test_from_urn_with_value(self):
|
|
"""Test creating ExternalTag from URN with key:value."""
|
|
tag = ExternalTag.from_urn("urn:li:tag:environment:production")
|
|
assert tag.key.original == "environment"
|
|
assert tag.value is not None
|
|
assert tag.value.original == "production"
|
|
assert str(tag.key) == "environment"
|
|
assert str(tag.value) == "production"
|
|
|
|
def test_from_urn_key_only(self):
|
|
"""Test creating ExternalTag from URN with key only."""
|
|
tag = ExternalTag.from_urn("urn:li:tag:critical")
|
|
assert tag.key.original == "critical"
|
|
assert tag.value is None
|
|
assert str(tag.key) == "critical"
|
|
|
|
def test_from_urn_multiple_colons(self):
|
|
"""Test URN parsing with multiple colons (only splits on first)."""
|
|
tag = ExternalTag.from_urn("urn:li:tag:database:mysql:version:8.0")
|
|
assert tag.key.original == "database"
|
|
assert tag.value is not None
|
|
assert tag.value.original == "mysql:version:8.0"
|
|
|
|
def test_from_key_value(self):
|
|
"""Test creating ExternalTag from explicit key/value."""
|
|
tag = ExternalTag.from_key_value("team", "data-engineering")
|
|
assert tag.key.original == "team"
|
|
assert tag.value is not None
|
|
assert tag.value.original == "data-engineering"
|
|
|
|
def test_from_key_only(self):
|
|
"""Test creating ExternalTag from key only."""
|
|
tag = ExternalTag.from_key_value("critical")
|
|
assert tag.key.original == "critical"
|
|
assert tag.value is None
|
|
|
|
def test_to_datahub_tag_urn_with_value(self):
|
|
"""Test generating DataHub URN with value."""
|
|
tag = ExternalTag.from_key_value("environment", "production")
|
|
urn = tag.to_datahub_tag_urn()
|
|
assert str(urn) == "urn:li:tag:environment:production"
|
|
|
|
def test_to_datahub_tag_urn_key_only(self):
|
|
"""Test generating DataHub URN with key only."""
|
|
tag = ExternalTag.from_key_value("critical")
|
|
urn = tag.to_datahub_tag_urn()
|
|
assert str(urn) == "urn:li:tag:critical"
|
|
|
|
def test_string_representation(self):
|
|
"""Test string representation of ExternalTag."""
|
|
tag_with_value = ExternalTag.from_key_value("env", "prod")
|
|
tag_key_only = ExternalTag.from_key_value("critical")
|
|
|
|
assert str(tag_with_value) == "env:prod"
|
|
assert str(tag_key_only) == "critical"
|
|
|
|
def test_repr(self):
|
|
"""Test repr representation of ExternalTag."""
|
|
tag = ExternalTag.from_key_value("env", "prod")
|
|
repr_str = repr(tag)
|
|
assert "ExternalTag" in repr_str
|
|
assert "env" in repr_str
|
|
assert "prod" in repr_str
|
|
|
|
def test_restricted_text_processing(self):
|
|
"""Test that RestrictedText processing works in ExternalTag."""
|
|
tag = ExternalTag.from_key_value(
|
|
"very long key with spaces and special chars!",
|
|
"very long value with \n newlines and \t tabs",
|
|
)
|
|
|
|
# Keys and values should be processed by RestrictedText
|
|
assert " " not in str(tag.key) # Spaces replaced
|
|
assert tag.value is not None
|
|
assert "\n" not in str(tag.value) # Newlines replaced
|
|
assert "\t" not in str(tag.value) # Tabs replaced
|
|
|
|
# But originals should be preserved
|
|
assert " " in tag.key.original
|
|
assert "!" in tag.key.original
|
|
assert "\n" in tag.value.original
|
|
assert "\t" in tag.value.original
|
|
|
|
def test_get_datahub_tag_fallback(self):
|
|
"""Test get_datahub_tag fallback when DataHub is not available."""
|
|
tag = ExternalTag.from_key_value("environment", "production")
|
|
|
|
# Since DataHub is not available in test environment, should return string
|
|
result = tag.to_datahub_tag_urn()
|
|
assert isinstance(result, TagUrn)
|
|
assert str(result) == "urn:li:tag:environment:production"
|
|
|
|
def test_parse_tag_name_static_method(self):
|
|
"""Test the static _parse_tag_name method."""
|
|
# With colon
|
|
key, value = ExternalTag._parse_tag_name("environment:production")
|
|
assert key == "environment"
|
|
assert value == "production"
|
|
|
|
# Without colon
|
|
key, value = ExternalTag._parse_tag_name("critical")
|
|
assert key == "critical"
|
|
assert value is None
|
|
|
|
# Multiple colons (only split on first)
|
|
key, value = ExternalTag._parse_tag_name("db:mysql:version:8.0")
|
|
assert key == "db"
|
|
assert value == "mysql:version:8.0"
|
|
|
|
|
|
class TestUnityCatalogTagKeyText:
|
|
"""Tests for UnityCatalogTagKeyText."""
|
|
|
|
def test_key_restrictions(self):
|
|
"""Test Unity Catalog key restrictions."""
|
|
key_text = UnityCatalogTagKeyText("data-source/type@main!")
|
|
|
|
# Should replace invalid characters
|
|
processed = str(key_text)
|
|
assert "/" not in processed
|
|
assert "=" not in processed
|
|
assert "@" in processed # Replacement character
|
|
|
|
# Should preserve original
|
|
assert key_text.original == "data-source/type@main!"
|
|
|
|
def test_key_length_limit(self):
|
|
"""Test Unity Catalog key length limit (127 chars)."""
|
|
long_key = "a" * 260 # Longer than 127 chars
|
|
key_text = UnityCatalogTagKeyText(long_key)
|
|
|
|
assert len(str(key_text)) <= 255
|
|
assert key_text.original == long_key
|
|
|
|
def test_valid_key_characters(self):
|
|
"""Test that valid characters are preserved."""
|
|
valid_key = "environment_prod_v1_2"
|
|
key_text = UnityCatalogTagKeyText(valid_key)
|
|
|
|
# These should be preserved (valid UC characters)
|
|
assert str(key_text) == valid_key
|
|
|
|
|
|
class TestUnityCatalogTagValueText:
|
|
"""Tests for UnityCatalogTagValueText."""
|
|
|
|
def test_value_restrictions(self):
|
|
"""Test Unity Catalog value restrictions."""
|
|
value_text = UnityCatalogTagValueText(
|
|
"MySQL Database: 8.0 (Primary)\nProduction Instance"
|
|
)
|
|
|
|
# Should replace control characters
|
|
processed = str(value_text)
|
|
assert "\n" not in processed
|
|
|
|
# Should preserve original
|
|
assert "\n" in value_text.original
|
|
|
|
def test_value_length_limit(self):
|
|
"""Test Unity Catalog value length limit (1000 chars)."""
|
|
long_value = "a" * 1010 # Longer than 1000 chars
|
|
value_text = UnityCatalogTagValueText(long_value)
|
|
|
|
assert len(str(value_text)) <= 1000
|
|
assert str(value_text).endswith("...")
|
|
assert value_text.original == long_value
|
|
|
|
def test_permissive_characters(self):
|
|
"""Test that most characters are allowed in values."""
|
|
complex_value = "MySQL: 8.0 (Primary) - Special chars: @#$%^&*"
|
|
value_text = UnityCatalogTagValueText(complex_value)
|
|
|
|
# Most characters should be preserved (more permissive than keys)
|
|
processed = str(value_text)
|
|
assert ":" in processed
|
|
assert "(" in processed
|
|
assert "@" in processed
|
|
assert "#" in processed
|
|
|
|
|
|
class TestUnityCatalogTag:
|
|
"""Tests for UnityCatalogTag class."""
|
|
|
|
def test_from_key_value(self):
|
|
"""Test creating UnityCatalogTag from key/value."""
|
|
tag = UnityCatalogTag.from_key_value("environment", "production")
|
|
assert tag.key.original == "environment"
|
|
assert tag.value is not None
|
|
assert tag.value.original == "production"
|
|
|
|
def test_from_dict(self):
|
|
"""Test creating UnityCatalogTag from dictionary."""
|
|
tag_dict = {"key": "team/owner", "value": "data-engineering@company.com"}
|
|
tag = UnityCatalogTag.from_dict(tag_dict)
|
|
|
|
assert tag.key.original == "team/owner"
|
|
assert tag.value is not None
|
|
assert tag.value.original == "data-engineering@company.com"
|
|
|
|
def test_to_dict(self):
|
|
"""Test converting UnityCatalogTag to dictionary."""
|
|
tag = UnityCatalogTag.from_key_value("environment", "production")
|
|
result = tag.to_dict()
|
|
|
|
expected = {"key": "environment", "value": "production"}
|
|
assert result == expected
|
|
|
|
def test_to_display_dict(self):
|
|
"""Test converting UnityCatalogTag to display dictionary."""
|
|
tag = UnityCatalogTag.from_key_value("data-source/type!", "MySQL: 8.0")
|
|
result = tag.to_display_dict()
|
|
|
|
# Should show processed values
|
|
assert result["key"] != "data-source/type!" # Should be processed
|
|
assert "/" not in result["key"] # Invalid chars replaced
|
|
assert "!" in result["key"] # Invalid chars replaced
|
|
|
|
def test_key_only_tag(self):
|
|
"""Test UnityCatalogTag with key only."""
|
|
tag = UnityCatalogTag.from_key_value("critical")
|
|
assert tag.key.original == "critical"
|
|
assert tag.value is None
|
|
|
|
result = tag.to_dict()
|
|
expected = {"key": "critical"}
|
|
assert result == expected
|
|
|
|
def test_truncation_detection(self):
|
|
"""Test truncation detection properties."""
|
|
# Long key (over 127 chars)
|
|
long_key = "a" * 256
|
|
tag1 = UnityCatalogTag.from_key_value(long_key, "short_value")
|
|
assert tag1.key != long_key
|
|
assert len(str(tag1.key)) == 255
|
|
|
|
# Long value (over 256 chars)
|
|
long_value = "b" * 1001
|
|
tag2 = UnityCatalogTag.from_key_value("short_key", long_value)
|
|
assert tag2.value is not None
|
|
assert tag2.value != long_value
|
|
assert len(str(tag2.value)) == 1000
|
|
|
|
# No truncation
|
|
tag3 = UnityCatalogTag.from_key_value("short", "short")
|
|
assert tag3.value is not None
|
|
assert str(tag3.value) == "short"
|
|
assert str(tag3.key) == "short"
|
|
|
|
def test_string_representation(self):
|
|
"""Test string representation of UnityCatalogTag."""
|
|
tag_with_value = UnityCatalogTag.from_key_value("env", "prod")
|
|
tag_key_only = UnityCatalogTag.from_key_value("critical")
|
|
|
|
assert str(tag_with_value) == "env:prod"
|
|
assert str(tag_key_only) == "critical"
|
|
|
|
def test_character_sanitization(self):
|
|
"""Test that invalid characters are properly sanitized."""
|
|
# Test key sanitization
|
|
tag = UnityCatalogTag.from_key_value("data/source@main!", "value")
|
|
processed_key = str(tag.key)
|
|
assert "/" not in processed_key
|
|
assert "@" in processed_key
|
|
assert "!" in processed_key
|
|
assert "_" in processed_key # Replacement char
|
|
|
|
# Test value sanitization
|
|
tag2 = UnityCatalogTag.from_key_value("key", "line1\nline2\tcolumn")
|
|
assert tag2.value is not None
|
|
processed_value = str(tag2.value)
|
|
assert "\n" not in processed_value
|
|
assert "\t" not in processed_value
|
|
|
|
def test_api_compatibility(self):
|
|
"""Test compatibility with Unity Catalog API format."""
|
|
# Simulate API response format
|
|
api_data = {"key": "data-source/type", "value": "PostgreSQL DB"}
|
|
tag = UnityCatalogTag.from_dict(api_data)
|
|
|
|
# Should be able to convert back to API format
|
|
api_output = tag.to_dict()
|
|
assert api_output["key"] == "data-source/type" # Original preserved
|
|
assert api_output["value"] == "PostgreSQL DB" # Original preserved
|
|
|
|
# Display format should show processed values
|
|
display_output = tag.to_display_dict()
|
|
assert "/" not in display_output["key"] # Should be sanitized
|
|
|
|
|
|
class TestIntegration:
|
|
"""Integration tests for the tag classes."""
|
|
|
|
def test_external_tag_to_unity_catalog_conversion(self):
|
|
"""Test converting ExternalTag concept to UnityCatalogTag."""
|
|
# Create an ExternalTag
|
|
external_tag = ExternalTag.from_key_value("data-source/type!", "MySQL: 8.0")
|
|
|
|
# Convert to UnityCatalogTag
|
|
uc_tag = UnityCatalogTag.from_key_value(
|
|
external_tag.key.original,
|
|
external_tag.value.original if external_tag.value is not None else None,
|
|
)
|
|
|
|
# Should have same original values
|
|
assert uc_tag.key.original == external_tag.key.original
|
|
if external_tag.value is not None and uc_tag.value is not None:
|
|
assert uc_tag.value.original == external_tag.value.original
|
|
|
|
# But different processing rules
|
|
assert str(uc_tag.key) != str(external_tag.key) # Different sanitization
|
|
|
|
def test_round_trip_urn_parsing(self):
|
|
"""Test round-trip URN parsing and generation."""
|
|
original_urn = "urn:li:tag:environment:production"
|
|
|
|
# Parse URN to ExternalTag
|
|
tag = ExternalTag.from_urn(original_urn)
|
|
|
|
# Generate URN back
|
|
generated_urn = tag.to_datahub_tag_urn()
|
|
|
|
# Should be identical
|
|
assert str(generated_urn) == original_urn
|
|
|
|
def test_complex_tag_scenarios(self):
|
|
"""Test complex real-world tag scenarios."""
|
|
test_cases = [
|
|
# Simple tags
|
|
("environment", "production"),
|
|
("critical", None),
|
|
# Tags with special characters
|
|
("data-source", "postgresql://user:pass@host:5432/db"),
|
|
("team/project", "data-eng/analytics-v2"),
|
|
# Tags with multiple colons
|
|
("version", "app:v1.2.3:stable"),
|
|
# Very long tags
|
|
("description", "This is a very long description " * 10),
|
|
]
|
|
|
|
for key, value in test_cases:
|
|
# Test ExternalTag
|
|
ext_tag = ExternalTag.from_key_value(key, value)
|
|
assert ext_tag.key.original == key
|
|
if value:
|
|
assert ext_tag.value is not None
|
|
assert ext_tag.value.original == value
|
|
|
|
# Test round-trip through URN
|
|
urn = ext_tag.to_datahub_tag_urn()
|
|
parsed_tag = ExternalTag.from_urn(urn)
|
|
assert parsed_tag.key.original == key
|
|
if value:
|
|
assert parsed_tag.value is not None
|
|
assert parsed_tag.value.original == value
|
|
|
|
# Test UnityCatalogTag
|
|
uc_tag = UnityCatalogTag.from_key_value(key, value)
|
|
assert uc_tag.key.original == key
|
|
if value:
|
|
assert uc_tag.value is not None
|
|
assert uc_tag.value.original == value
|