fix(ingest/api): External tag (UnityTag, LakeFormationTag) fixes (#14322)

This commit is contained in:
Tamas Nemeth 2025-08-05 18:08:45 +01:00 committed by GitHub
parent 59384dfc9b
commit 8845cf27f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 558 additions and 387 deletions

View File

@ -47,12 +47,14 @@ class ExternalTag(BaseModel):
if key is not None:
# Direct initialization with key/value
processed_key = (
RestrictedText(key) if not isinstance(key, RestrictedText) else key
RestrictedText(raw_text=key)
if not isinstance(key, RestrictedText)
else key
)
processed_value = None
if value is not None:
processed_value = (
RestrictedText(value)
RestrictedText(raw_text=value)
if not isinstance(value, RestrictedText)
else value
)
@ -95,9 +97,9 @@ class ExternalTag(BaseModel):
'urn:li:tag:key:value' if value exists, otherwise 'urn:li:tag:key'
"""
if self.value is not None:
tag_name = f"{self.key.original}:{self.value.original}"
tag_name = f"{self.key.raw_text}:{self.value.raw_text}"
else:
tag_name = self.key.original
tag_name = self.key.raw_text
return TagUrn(name=tag_name)

View File

@ -10,8 +10,9 @@
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
# Tag search requires exact term matching.
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, Optional
from pydantic import validator
from typing_extensions import ClassVar
from datahub.api.entities.external.external_tag import ExternalTag
@ -19,21 +20,21 @@ from datahub.api.entities.external.restricted_text import RestrictedText
class LakeFormationTagKeyText(RestrictedText):
"""RestrictedText configured for Unity Catalog tag keys."""
"""RestrictedText configured for Lake Formation tag keys."""
_default_max_length: ClassVar[int] = 50
# Unity Catalog tag keys: alphanumeric, hyphens, underscores, periods only
_default_replacement_char: ClassVar[str] = "_"
_default_truncation_suffix: ClassVar[str] = "" # No suffix for clean identifiers
DEFAULT_MAX_LENGTH: ClassVar[int] = 50
# Lake Formation tag keys restrictions
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
class LakeFormationTagValueText(RestrictedText):
"""RestrictedText configured for Unity Catalog tag values."""
"""RestrictedText configured for Lake Formation tag values."""
_default_max_length: ClassVar[int] = 50
# Unity Catalog tag values are more permissive but still have some restrictions
_default_replacement_char: ClassVar[str] = " "
_default_truncation_suffix: ClassVar[str] = "..."
DEFAULT_MAX_LENGTH: ClassVar[int] = 50
# Lake Formation tag values restrictions
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
class LakeFormationTag(ExternalTag):
@ -49,43 +50,43 @@ class LakeFormationTag(ExternalTag):
value: Optional[LakeFormationTagValueText] = None
catalog: Optional[str] = None
def __init__(
self,
key: Optional[Union[str, LakeFormationTagKeyText]] = None,
value: Optional[Union[str, LakeFormationTagValueText]] = None,
**data: Any,
) -> None:
"""
Initialize LakeFormation Tag from either a DataHub Tag URN or explicit key/value.
# Pydantic v1 validators
@validator("key", pre=True)
@classmethod
def _validate_key(cls, v: Any) -> LakeFormationTagKeyText:
"""Validate and convert key field for Pydantic v1."""
if isinstance(v, LakeFormationTagKeyText):
return v
Args:
key: Explicit key value (optional for Pydantic initialization)
value: Explicit value (optional)
**data: Additional Pydantic data
"""
if key is not None:
# Direct initialization with key/value
processed_key = (
LakeFormationTagKeyText(key)
if not isinstance(key, LakeFormationTagKeyText)
else key
)
processed_value = None
if value is not None:
processed_value = (
LakeFormationTagValueText(value)
if not isinstance(value, LakeFormationTagValueText)
else value
)
# If we get a RestrictedText object from parent class validation, use its raw_text value
if hasattr(v, "raw_text"):
return LakeFormationTagKeyText(raw_text=v.raw_text)
super().__init__(
key=processed_key,
value=processed_value,
**data,
)
else:
# Standard pydantic initialization
super().__init__(**data)
return LakeFormationTagKeyText(raw_text=v)
@validator("value", pre=True)
@classmethod
def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]:
"""Validate and convert value field for Pydantic v1."""
if v is None:
return None
if isinstance(v, LakeFormationTagValueText):
return v
# If we get a RestrictedText object from parent class validation, use its raw_text value
if hasattr(v, "raw_text"):
text_value = v.raw_text
# If value is an empty string, set it to None to not generate empty value in DataHub tag
if not str(text_value):
return None
return LakeFormationTagValueText(raw_text=text_value)
# If value is an empty string, set it to None to not generate empty value in DataHub tag
if not str(v):
return None
return LakeFormationTagValueText(raw_text=v)
def __eq__(self, other: object) -> bool:
"""Check equality based on key and value."""
@ -137,9 +138,9 @@ class LakeFormationTag(ExternalTag):
Returns:
Dictionary with 'key' and optionally 'value'
"""
result: Dict[str, str] = {"key": self.key.original}
result: Dict[str, str] = {"key": self.key.raw_text}
if self.value is not None:
result["value"] = self.value.original
result["value"] = self.value.raw_text
return result
def to_display_dict(self) -> Dict[str, str]:

View File

@ -11,41 +11,12 @@ Features:
from __future__ import annotations
from typing import Any, ClassVar, Optional, Set, Union
from typing import ClassVar, Optional, Set
# Check Pydantic version and import accordingly
try:
from pydantic import VERSION
PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2
except (ImportError, AttributeError):
# Fallback for older versions that don't have VERSION
PYDANTIC_V2 = False
if PYDANTIC_V2:
from pydantic import GetCoreSchemaHandler # type: ignore[attr-defined]
from pydantic_core import core_schema
else:
from pydantic.validators import str_validator
from datahub.configuration.common import ConfigModel
class RestrictedTextConfig:
"""Configuration class for RestrictedText."""
def __init__(
self,
max_length: Optional[int] = None,
forbidden_chars: Optional[Set[str]] = None,
replacement_char: Optional[str] = None,
truncation_suffix: Optional[str] = None,
):
self.max_length = max_length
self.forbidden_chars = forbidden_chars
self.replacement_char = replacement_char
self.truncation_suffix = truncation_suffix
class RestrictedText(str):
class RestrictedText(ConfigModel):
"""A string type that stores the original value but returns a truncated and sanitized version.
This type allows you to:
@ -60,8 +31,9 @@ class RestrictedText(str):
# Basic usage with default settings
name: RestrictedText
# Custom max length and character replacement using Field
custom_field: RestrictedText = RestrictedText.with_config(
# Custom max length and character replacement
custom_field: RestrictedText = RestrictedText(
text="hello-world.test",
max_length=10,
forbidden_chars={' ', '-', '.'},
replacement_char='_'
@ -73,175 +45,128 @@ class RestrictedText(str):
custom_field="hello-world.test"
)
print(model.name) # Truncated and sanitized version
print(model.name.original) # Original value
print(model.custom_field) # "hello_worl..."
# model.name returns truncated and sanitized version
# model.name.raw_text returns original value
# model.custom_field returns "hello_worl..."
```
"""
# Default configuration
_default_max_length: ClassVar[Optional[int]] = 50
_default_forbidden_chars: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
_default_replacement_char: ClassVar[str] = "_"
_default_truncation_suffix: ClassVar[str] = "..."
DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"}
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
def __new__(cls, value: str = "") -> "RestrictedText":
"""Create a new string instance."""
instance = str.__new__(cls, "") # We'll set the display value later
return instance
raw_text: str
max_length: Optional[int] = None
forbidden_chars: Optional[Set[str]] = None
replacement_char: Optional[str] = None
truncation_suffix: Optional[str] = None
_processed_value: Optional[str] = None
def __init__(self, value: str = ""):
"""Initialize the RestrictedText with a value."""
self.original: str = value
self.max_length = self._default_max_length
self.forbidden_chars = self._default_forbidden_chars
self.replacement_char = self._default_replacement_char
self.truncation_suffix = self._default_truncation_suffix
def __init__(self, **data):
super().__init__(**data)
self.validate_text()
# Process the value
self._processed_value = self._process_value(value)
@classmethod
def __get_validators__(cls):
yield cls.pydantic_accept_raw_text
yield cls.validate
yield cls.pydantic_validate_text
def _configure(
@classmethod
def pydantic_accept_raw_text(cls, v):
if isinstance(v, (RestrictedText, dict)):
return v
assert isinstance(v, str), "text must be a string"
return {"text": v}
@classmethod
def pydantic_validate_text(cls, v):
assert isinstance(v, RestrictedText)
assert v.validate_text()
return v
@classmethod
def validate(cls, v):
"""Validate and create a RestrictedText instance."""
if isinstance(v, RestrictedText):
return v
# This should be a dict at this point from pydantic_accept_raw_text
if isinstance(v, dict):
instance = cls(**v)
instance.validate_text()
return instance
raise ValueError(f"Unable to validate RestrictedText from {type(v)}")
def validate_text(self) -> bool:
"""Validate the text and apply restrictions."""
# Set defaults if not provided
max_length = (
self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH
)
forbidden_chars = (
self.forbidden_chars
if self.forbidden_chars is not None
else self.DEFAULT_FORBIDDEN_CHARS
)
replacement_char = (
self.replacement_char
if self.replacement_char is not None
else self.DEFAULT_REPLACEMENT_CHAR
)
truncation_suffix = (
self.truncation_suffix
if self.truncation_suffix is not None
else self.DEFAULT_TRUNCATION_SUFFIX
)
# Store processed value
self._processed_value = self._process_value(
self.raw_text,
max_length,
forbidden_chars,
replacement_char,
truncation_suffix,
)
return True
def _process_value(
self,
max_length: Optional[int] = None,
forbidden_chars: Optional[Set[str]] = None,
replacement_char: Optional[str] = None,
truncation_suffix: Optional[str] = None,
) -> "RestrictedText":
"""Configure this instance with custom settings."""
if max_length is not None:
self.max_length = max_length
if forbidden_chars is not None:
self.forbidden_chars = forbidden_chars
if replacement_char is not None:
self.replacement_char = replacement_char
if truncation_suffix is not None:
self.truncation_suffix = truncation_suffix
# Reprocess the value with new configuration
self._processed_value = self._process_value(self.original)
return self
def _process_value(self, value: str) -> str:
value: str,
max_length: Optional[int],
forbidden_chars: Set[str],
replacement_char: str,
truncation_suffix: str,
) -> str:
"""Process the value by replacing characters and truncating."""
# Replace specified characters
processed = value
for char in self.forbidden_chars:
processed = processed.replace(char, self.replacement_char)
for char in forbidden_chars:
processed = processed.replace(char, replacement_char)
# Truncate if necessary
if self.max_length is not None and len(processed) > self.max_length:
if len(self.truncation_suffix) >= self.max_length:
if max_length is not None and len(processed) > max_length:
if len(truncation_suffix) >= max_length:
# If suffix is too long, just truncate without suffix
processed = processed[: self.max_length]
processed = processed[:max_length]
else:
# Truncate and add suffix
truncate_length = self.max_length - len(self.truncation_suffix)
processed = processed[:truncate_length] + self.truncation_suffix
truncate_length = max_length - len(truncation_suffix)
processed = processed[:truncate_length] + truncation_suffix
return processed
def __str__(self) -> str:
"""Return the processed (truncated and sanitized) value."""
return self._processed_value
return self._processed_value or ""
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._processed_value!r})"
return f"{self.__class__.__name__}({self.raw_text!r})"
@property
def processed(self) -> str:
"""Get the processed (truncated and sanitized) value."""
return self._processed_value
@classmethod
def with_config(
cls,
max_length: Optional[int] = None,
forbidden_chars: Optional[Set[str]] = None,
replacement_char: Optional[str] = None,
truncation_suffix: Optional[str] = None,
) -> RestrictedTextConfig:
"""Create a configuration object for use as field default.
Args:
max_length: Maximum length of the processed string
forbidden_chars: Set of characters to replace
replacement_char: Character to use as replacement
truncation_suffix: Suffix to add when truncating
Returns:
A configuration object that can be used as field default
"""
return RestrictedTextConfig(
max_length=max_length,
forbidden_chars=forbidden_chars,
replacement_char=replacement_char,
truncation_suffix=truncation_suffix,
)
# Pydantic v2 methods
if PYDANTIC_V2:
@classmethod
def _validate(
cls,
__input_value: Union[str, "RestrictedText"],
_: core_schema.ValidationInfo,
) -> "RestrictedText":
"""Validate and create a RestrictedText instance."""
if isinstance(__input_value, RestrictedText):
return __input_value
return cls(__input_value)
@classmethod
def __get_pydantic_core_schema__(
cls, source: type[Any], handler: GetCoreSchemaHandler
) -> core_schema.CoreSchema:
"""Get the Pydantic core schema for this type."""
return core_schema.with_info_after_validator_function(
cls._validate,
core_schema.str_schema(),
field_name=cls.__name__,
)
# Pydantic v1 methods
else:
@classmethod
def __get_validators__(cls):
"""Pydantic v1 validator method."""
yield cls.validate
@classmethod
def validate(cls, v, field=None):
"""Validate and create a RestrictedText instance for Pydantic v1."""
if isinstance(v, RestrictedText):
return v
if not isinstance(v, str):
# Let pydantic handle the string validation
v = str_validator(v)
# Create instance
instance = cls(v)
# Check if there's a field default that contains configuration
if (
field
and hasattr(field, "default")
and isinstance(field.default, RestrictedTextConfig)
):
config = field.default
instance._configure(
max_length=config.max_length,
forbidden_chars=config.forbidden_chars,
replacement_char=config.replacement_char,
truncation_suffix=config.truncation_suffix,
)
return instance
@classmethod
def __modify_schema__(cls, field_schema):
"""Modify the JSON schema for Pydantic v1."""
field_schema.update(type="string", examples=["example string"])
return self._processed_value or ""

View File

@ -10,8 +10,10 @@
# Tag search using the workspace search UI is supported only for tables, views, and table columns.
# Tag search requires exact term matching.
# https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint
from typing import Any, Dict, Optional, Set, Union
from typing import Any, Dict, Optional, Set
# Import validator for Pydantic v1 (always needed since we removed conditional logic)
from pydantic import validator
from typing_extensions import ClassVar
from datahub.api.entities.external.external_tag import ExternalTag
@ -21,9 +23,9 @@ from datahub.api.entities.external.restricted_text import RestrictedText
class UnityCatalogTagKeyText(RestrictedText):
"""RestrictedText configured for Unity Catalog tag keys."""
_default_max_length: ClassVar[int] = 255
# Unity Catalog tag keys: alphanumeric, hyphens, underscores, periods only
_default_forbidden_chars: ClassVar[Set[str]] = {
DEFAULT_MAX_LENGTH: ClassVar[int] = 255
# Unity Catalog tag keys: forbidden characters based on constraints
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {
"\t",
"\n",
"\r",
@ -34,18 +36,18 @@ class UnityCatalogTagKeyText(RestrictedText):
"/",
":",
}
_default_replacement_char: ClassVar[str] = "_"
_default_truncation_suffix: ClassVar[str] = "" # No suffix for clean identifiers
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_"
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers
class UnityCatalogTagValueText(RestrictedText):
"""RestrictedText configured for Unity Catalog tag values."""
_default_max_length: ClassVar[int] = 1000
DEFAULT_MAX_LENGTH: ClassVar[int] = 1000
# Unity Catalog tag values are more permissive but still have some restrictions
_default_forbidden_chars: ClassVar[Set[str]] = {"\t", "\n", "\r"}
_default_replacement_char: ClassVar[str] = " "
_default_truncation_suffix: ClassVar[str] = "..."
DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"}
DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " "
DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..."
class UnityCatalogTag(ExternalTag):
@ -60,46 +62,43 @@ class UnityCatalogTag(ExternalTag):
key: UnityCatalogTagKeyText
value: Optional[UnityCatalogTagValueText] = None
def __init__(
self,
key: Optional[Union[str, UnityCatalogTagKeyText]] = None,
value: Optional[Union[str, UnityCatalogTagValueText]] = None,
**data: Any,
) -> None:
"""
Initialize UnityCatalogTag from either a DataHub Tag URN or explicit key/value.
# Pydantic v1 validators
@validator("key", pre=True)
@classmethod
def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText:
"""Validate and convert key field for Pydantic v1."""
if isinstance(v, UnityCatalogTagKeyText):
return v
Args:
key: Explicit key value (optional for Pydantic initialization)
value: Explicit value (optional)
**data: Additional Pydantic data
"""
if key is not None:
# Direct initialization with key/value
processed_key = (
UnityCatalogTagKeyText(key)
if not isinstance(key, UnityCatalogTagKeyText)
else key
)
processed_value = None
if value is not None:
processed_value = (
UnityCatalogTagValueText(value)
if not isinstance(value, UnityCatalogTagValueText)
else value
)
# If value is an empty string, set it to None to not generater empty value in DataHub tag which results in key: tags
if not str(value):
processed_value = None
# If we get a RestrictedText object from parent class validation, use its raw_text value
if hasattr(v, "raw_text"):
return UnityCatalogTagKeyText(raw_text=v.raw_text)
super().__init__(
key=processed_key,
value=processed_value,
**data,
)
else:
# Standard pydantic initialization
super().__init__(**data)
return UnityCatalogTagKeyText(raw_text=v)
@validator("value", pre=True)
@classmethod
def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]:
"""Validate and convert value field for Pydantic v1."""
if v is None:
return None
if isinstance(v, UnityCatalogTagValueText):
return v
# If we get a RestrictedText object from parent class validation, use its raw_text value
if hasattr(v, "raw_text"):
text_value = v.raw_text
# If value is an empty string, set it to None to not generate empty value in DataHub tag
if not str(text_value):
return None
return UnityCatalogTagValueText(raw_text=text_value)
# If value is an empty string, set it to None to not generate empty value in DataHub tag
if not str(v):
return None
return UnityCatalogTagValueText(raw_text=v)
def __eq__(self, other: object) -> bool:
"""Check equality based on key and value."""
@ -124,7 +123,7 @@ class UnityCatalogTag(ExternalTag):
Returns:
UnityCatalogTag instance
"""
return cls(key=tag_dict["key"], value=tag_dict.get("value"))
return cls(**tag_dict)
@classmethod
def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag":
@ -149,9 +148,9 @@ class UnityCatalogTag(ExternalTag):
Returns:
Dictionary with 'key' and optionally 'value'
"""
result: Dict[str, str] = {"key": self.key.original}
result: Dict[str, str] = {"key": self.key.raw_text}
if self.value is not None:
result["value"] = self.value.original
result["value"] = self.value.raw_text
return result
def to_display_dict(self) -> Dict[str, str]:

View File

@ -395,7 +395,7 @@ class GlueSource(StatefulIngestionSourceBase):
t = LakeFormationTag(
key=tag_key,
value=tag_value,
catalog_id=catalog_id,
catalog=catalog_id,
)
tags.append(t)
return tags
@ -438,7 +438,7 @@ class GlueSource(StatefulIngestionSourceBase):
t = LakeFormationTag(
key=tag_key,
value=tag_value,
catalog_id=catalog_id,
catalog=catalog_id,
)
tags.append(t)
return tags

View File

@ -88,8 +88,8 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
return existing_platform_resource
return LakeFormationTagPlatformResourceId(
tag_key=tag.key,
tag_value=tag.value if tag.value is not None else None,
tag_key=str(tag.key),
tag_value=str(tag.value) if tag.value is not None else None,
platform_instance=platform_instance,
exists_in_lake_formation=exists_in_lake_formation,
catalog=catalog,

View File

@ -77,13 +77,13 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
)
if existing_platform_resource:
logger.info(
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.original}: {existing_platform_resource}"
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}"
)
return existing_platform_resource
return UnityCatalogTagPlatformResourceId(
tag_key=tag.key.original,
tag_value=tag.value.original if tag.value is not None else None,
tag_key=tag.key.raw_text,
tag_value=tag.value.raw_text if tag.value is not None else None,
platform_instance=platform_instance,
exists_in_unity_catalog=exists_in_unity_catalog,
persisted=False,

View File

@ -5,6 +5,11 @@ Tests for ExternalTag and UnityCatalogTag classes.
from datahub.api.entities.external.external_tag import (
ExternalTag,
)
from datahub.api.entities.external.lake_formation_external_entites import (
LakeFormationTag,
LakeFormationTagKeyText,
LakeFormationTagValueText,
)
from datahub.api.entities.external.restricted_text import RestrictedText
from datahub.api.entities.external.unity_catalog_external_entites import (
UnityCatalogTag,
@ -19,30 +24,30 @@ class TestRestrictedText:
def test_basic_functionality(self):
"""Test basic RestrictedText creation and processing."""
text = RestrictedText("Hello World! This is a test.")
text = RestrictedText(raw_text="Hello World! This is a test.")
assert str(text) == "Hello_World!_This_is_a_test."
assert text.original == "Hello World! This is a test."
assert text.raw_text == "Hello World! This is a test."
assert text.processed == "Hello_World!_This_is_a_test."
def test_truncation(self):
"""Test text truncation with default settings."""
long_text = "A" * 60 # Longer than default 50 chars
text = RestrictedText(long_text)
text = RestrictedText(raw_text=long_text)
assert len(str(text)) == 50
assert str(text).endswith("...")
assert text.original == long_text
assert text.raw_text == long_text
def test_custom_configuration(self):
"""Test RestrictedText with custom configuration."""
text = RestrictedText("hello-world.test")
text._configure(
text = RestrictedText(
raw_text="hello-world.test",
max_length=10,
forbidden_chars={"-", "."},
replacement_char="_",
truncation_suffix="...",
)
assert str(text) == "hello_w..."
assert text.original == "hello-world.test"
assert text.raw_text == "hello-world.test"
class TestExternalTag:
@ -51,37 +56,37 @@ class TestExternalTag:
def test_from_urn_with_value(self):
"""Test creating ExternalTag from URN with key:value."""
tag = ExternalTag.from_urn("urn:li:tag:environment:production")
assert tag.key.original == "environment"
assert tag.key.raw_text == "environment"
assert tag.value is not None
assert tag.value.original == "production"
assert tag.value.raw_text == "production"
assert str(tag.key) == "environment"
assert str(tag.value) == "production"
def test_from_urn_key_only(self):
"""Test creating ExternalTag from URN with key only."""
tag = ExternalTag.from_urn("urn:li:tag:critical")
assert tag.key.original == "critical"
assert tag.key.raw_text == "critical"
assert tag.value is None
assert str(tag.key) == "critical"
def test_from_urn_multiple_colons(self):
"""Test URN parsing with multiple colons (only splits on first)."""
tag = ExternalTag.from_urn("urn:li:tag:database:mysql:version:8.0")
assert tag.key.original == "database"
assert tag.key.raw_text == "database"
assert tag.value is not None
assert tag.value.original == "mysql:version:8.0"
assert tag.value.raw_text == "mysql:version:8.0"
def test_from_key_value(self):
"""Test creating ExternalTag from explicit key/value."""
tag = ExternalTag.from_key_value("team", "data-engineering")
assert tag.key.original == "team"
assert tag.key.raw_text == "team"
assert tag.value is not None
assert tag.value.original == "data-engineering"
assert tag.value.raw_text == "data-engineering"
def test_from_key_only(self):
"""Test creating ExternalTag from key only."""
tag = ExternalTag.from_key_value("critical")
assert tag.key.original == "critical"
assert tag.key.raw_text == "critical"
assert tag.value is None
def test_to_datahub_tag_urn_with_value(self):
@ -126,10 +131,10 @@ class TestExternalTag:
assert "\t" not in str(tag.value) # Tabs replaced
# But originals should be preserved
assert " " in tag.key.original
assert "!" in tag.key.original
assert "\n" in tag.value.original
assert "\t" in tag.value.original
assert " " in tag.key.raw_text
assert "!" in tag.key.raw_text
assert "\n" in tag.value.raw_text
assert "\t" in tag.value.raw_text
def test_get_datahub_tag_fallback(self):
"""Test get_datahub_tag fallback when DataHub is not available."""
@ -158,12 +163,251 @@ class TestExternalTag:
assert value == "mysql:version:8.0"
class TestLakeFormationTagKeyText:
"""Tests for LakeFormationTagKeyText."""
def test_key_restrictions(self):
"""Test Lake Formation key restrictions."""
key_text = LakeFormationTagKeyText(raw_text="data-source with spaces!")
# Should replace spaces and other characters
processed = str(key_text)
assert " " not in processed
assert "_" in processed # Replacement character
# Should preserve original
assert key_text.raw_text == "data-source with spaces!"
def test_key_length_limit(self):
"""Test Lake Formation key length limit (50 chars)."""
long_key = "a" * 60 # Longer than 50 chars
key_text = LakeFormationTagKeyText(raw_text=long_key)
assert len(str(key_text)) <= 50
assert key_text.raw_text == long_key
def test_valid_key_characters(self):
"""Test that valid characters are preserved."""
valid_key = "environment_prod_v1_2"
key_text = LakeFormationTagKeyText(raw_text=valid_key)
# These should be preserved (valid characters)
assert str(key_text) == valid_key
def test_no_truncation_suffix(self):
"""Test that Lake Formation keys don't use truncation suffix."""
long_key = "a" * 60
key_text = LakeFormationTagKeyText(raw_text=long_key)
processed = str(key_text)
# Should not end with dots since DEFAULT_TRUNCATION_SUFFIX is ""
assert not processed.endswith("...")
assert len(processed) == 50
class TestLakeFormationTagValueText:
"""Tests for LakeFormationTagValueText."""
def test_value_restrictions(self):
"""Test Lake Formation value restrictions."""
value_text = LakeFormationTagValueText(
raw_text="Database Instance\nWith control chars\t"
)
# Should replace control characters with spaces
processed = str(value_text)
assert "\n" not in processed
assert "\t" not in processed
assert " " in processed # Replacement character
# Should preserve original
assert "\n" in value_text.raw_text
assert "\t" in value_text.raw_text
def test_value_length_limit(self):
"""Test Lake Formation value length limit (50 chars)."""
long_value = "a" * 60 # Longer than 50 chars
value_text = LakeFormationTagValueText(raw_text=long_value)
assert len(str(value_text)) <= 50
assert str(value_text).endswith("...")
assert value_text.raw_text == long_value
def test_permissive_characters(self):
"""Test that most characters are allowed in values."""
complex_value = "MySQL: 8.0 (Primary) - Special chars: @#$%^&*"
value_text = LakeFormationTagValueText(raw_text=complex_value)
# Most characters should be preserved (more permissive than keys)
processed = str(value_text)
assert ":" in processed
assert "(" in processed
assert "@" in processed
assert "#" in processed
class TestLakeFormationTag:
"""Tests for LakeFormationTag class."""
def test_from_key_value(self):
"""Test creating LakeFormationTag from key/value."""
tag = LakeFormationTag.from_key_value("environment", "production")
assert tag.key.raw_text == "environment"
assert tag.value is not None
assert tag.value.raw_text == "production"
def test_from_dict(self):
"""Test creating LakeFormationTag from dictionary."""
tag_dict = {"key": "team owner", "value": "data engineering"}
tag = LakeFormationTag.from_dict(tag_dict)
assert tag.key.raw_text == "team owner"
assert tag.value is not None
assert tag.value.raw_text == "data engineering"
def test_to_dict(self):
"""Test converting LakeFormationTag to dictionary."""
tag = LakeFormationTag.from_key_value("environment", "production")
result = tag.to_dict()
expected = {"key": "environment", "value": "production"}
assert result == expected
def test_to_display_dict(self):
"""Test converting LakeFormationTag to display dictionary."""
tag = LakeFormationTag.from_key_value("data source type", "MySQL: 8.0")
result = tag.to_display_dict()
# Should show processed values
assert result["key"] != "data source type" # Should be processed
assert " " not in result["key"] # Spaces replaced
assert "_" in result["key"] # Replacement character
def test_key_only_tag(self):
"""Test LakeFormationTag with key only."""
tag = LakeFormationTag.from_key_value("critical")
assert tag.key.raw_text == "critical"
assert tag.value is None
result = tag.to_dict()
expected = {"key": "critical"}
assert result == expected
def test_direct_initialization(self):
"""Test direct initialization with strings (uses validators)."""
tag = LakeFormationTag(key="environment", value="production")
assert tag.key.raw_text == "environment"
assert tag.value is not None
assert tag.value.raw_text == "production"
def test_direct_initialization_with_objects(self):
"""Test direct initialization with RestrictedText objects."""
key_obj = LakeFormationTagKeyText(raw_text="team")
value_obj = LakeFormationTagValueText(raw_text="engineering")
tag = LakeFormationTag(key=key_obj, value=value_obj)
assert tag.key.raw_text == "team"
assert tag.value is not None
assert tag.value.raw_text == "engineering"
def test_truncation_detection(self):
"""Test truncation detection properties."""
# Long key (over 50 chars)
long_key = "a" * 60
tag1 = LakeFormationTag.from_key_value(long_key, "short_value")
assert tag1.key.raw_text == long_key # Original preserved
assert len(str(tag1.key)) == 50 # Processed truncated
# Long value (over 50 chars)
long_value = "b" * 60
tag2 = LakeFormationTag.from_key_value("short_key", long_value)
assert tag2.value is not None
assert tag2.value.raw_text == long_value # Original preserved
assert len(str(tag2.value)) == 50 # Processed truncated
# No truncation
tag3 = LakeFormationTag.from_key_value("short", "short")
assert tag3.value is not None
assert str(tag3.value) == "short"
assert str(tag3.key) == "short"
def test_string_representation(self):
"""Test string representation of LakeFormationTag."""
tag_with_value = LakeFormationTag.from_key_value("env", "prod")
tag_key_only = LakeFormationTag.from_key_value("critical")
assert str(tag_with_value) == "env:prod"
assert str(tag_key_only) == "critical"
def test_character_sanitization(self):
"""Test that invalid characters are properly sanitized."""
# Test key sanitization (spaces replaced with underscores)
tag = LakeFormationTag.from_key_value("data source main", "value")
processed_key = str(tag.key)
assert " " not in processed_key
assert "_" in processed_key # Replacement char
# Test value sanitization (control chars replaced with spaces)
tag2 = LakeFormationTag.from_key_value("key", "line1\nline2\tcolumn")
assert tag2.value is not None
processed_value = str(tag2.value)
assert "\n" not in processed_value
assert "\t" not in processed_value
assert " " in processed_value # Replacement char
def test_api_compatibility(self):
"""Test compatibility with Lake Formation API format."""
# Simulate API response format
api_data = {"key": "data source type", "value": "PostgreSQL DB"}
tag = LakeFormationTag.from_dict(api_data)
# Should be able to convert back to API format
api_output = tag.to_dict()
assert api_output["key"] == "data source type" # Original preserved
assert api_output["value"] == "PostgreSQL DB" # Original preserved
# Display format should show processed values
display_output = tag.to_display_dict()
assert " " not in display_output["key"] # Should be sanitized
def test_empty_value_handling(self):
"""Test that empty values are handled correctly."""
# Empty string value should become None
tag = LakeFormationTag.from_key_value("key", "")
assert tag.value is None
result = tag.to_dict()
assert "value" not in result or result.get("value") is None
def test_equality_and_hashing(self):
"""Test equality and hashing of LakeFormationTag objects."""
tag1 = LakeFormationTag.from_key_value("environment", "production")
tag2 = LakeFormationTag.from_key_value("environment", "production")
tag3 = LakeFormationTag.from_key_value("environment", "staging")
# Same tags should be equal
assert tag1 == tag2
assert hash(tag1) == hash(tag2)
# Different tags should not be equal
assert tag1 != tag3
assert hash(tag1) != hash(tag3)
def test_repr(self):
"""Test repr representation of LakeFormationTag."""
tag = LakeFormationTag.from_key_value("env", "prod")
repr_str = repr(tag)
assert "LakeFormationTag" in repr_str
assert "env" in repr_str
assert "prod" in repr_str
class TestUnityCatalogTagKeyText:
"""Tests for UnityCatalogTagKeyText."""
def test_key_restrictions(self):
"""Test Unity Catalog key restrictions."""
key_text = UnityCatalogTagKeyText("data-source/type@main!")
key_text = UnityCatalogTagKeyText(raw_text="data-source/type@main!")
# Should replace invalid characters
processed = str(key_text)
@ -172,20 +416,20 @@ class TestUnityCatalogTagKeyText:
assert "@" in processed # Replacement character
# Should preserve original
assert key_text.original == "data-source/type@main!"
assert key_text.raw_text == "data-source/type@main!"
def test_key_length_limit(self):
"""Test Unity Catalog key length limit (127 chars)."""
long_key = "a" * 260 # Longer than 127 chars
key_text = UnityCatalogTagKeyText(long_key)
key_text = UnityCatalogTagKeyText(raw_text=long_key)
assert len(str(key_text)) <= 255
assert key_text.original == long_key
assert key_text.raw_text == long_key
def test_valid_key_characters(self):
"""Test that valid characters are preserved."""
valid_key = "environment_prod_v1_2"
key_text = UnityCatalogTagKeyText(valid_key)
key_text = UnityCatalogTagKeyText(raw_text=valid_key)
# These should be preserved (valid UC characters)
assert str(key_text) == valid_key
@ -197,7 +441,7 @@ class TestUnityCatalogTagValueText:
def test_value_restrictions(self):
"""Test Unity Catalog value restrictions."""
value_text = UnityCatalogTagValueText(
"MySQL Database: 8.0 (Primary)\nProduction Instance"
raw_text="MySQL Database: 8.0 (Primary)\nProduction Instance"
)
# Should replace control characters
@ -205,21 +449,21 @@ class TestUnityCatalogTagValueText:
assert "\n" not in processed
# Should preserve original
assert "\n" in value_text.original
assert "\n" in value_text.raw_text
def test_value_length_limit(self):
"""Test Unity Catalog value length limit (1000 chars)."""
long_value = "a" * 1010 # Longer than 1000 chars
value_text = UnityCatalogTagValueText(long_value)
value_text = UnityCatalogTagValueText(raw_text=long_value)
assert len(str(value_text)) <= 1000
assert str(value_text).endswith("...")
assert value_text.original == long_value
assert value_text.raw_text == long_value
def test_permissive_characters(self):
"""Test that most characters are allowed in values."""
complex_value = "MySQL: 8.0 (Primary) - Special chars: @#$%^&*"
value_text = UnityCatalogTagValueText(complex_value)
value_text = UnityCatalogTagValueText(raw_text=complex_value)
# Most characters should be preserved (more permissive than keys)
processed = str(value_text)
@ -235,18 +479,18 @@ class TestUnityCatalogTag:
def test_from_key_value(self):
"""Test creating UnityCatalogTag from key/value."""
tag = UnityCatalogTag.from_key_value("environment", "production")
assert tag.key.original == "environment"
assert tag.key.raw_text == "environment"
assert tag.value is not None
assert tag.value.original == "production"
assert tag.value.raw_text == "production"
def test_from_dict(self):
"""Test creating UnityCatalogTag from dictionary."""
tag_dict = {"key": "team/owner", "value": "data-engineering@company.com"}
tag = UnityCatalogTag.from_dict(tag_dict)
assert tag.key.original == "team/owner"
assert tag.key.raw_text == "team/owner"
assert tag.value is not None
assert tag.value.original == "data-engineering@company.com"
assert tag.value.raw_text == "data-engineering@company.com"
def test_to_dict(self):
"""Test converting UnityCatalogTag to dictionary."""
@ -269,7 +513,7 @@ class TestUnityCatalogTag:
def test_key_only_tag(self):
"""Test UnityCatalogTag with key only."""
tag = UnityCatalogTag.from_key_value("critical")
assert tag.key.original == "critical"
assert tag.key.raw_text == "critical"
assert tag.value is None
result = tag.to_dict()
@ -348,14 +592,14 @@ class TestIntegration:
# Convert to UnityCatalogTag
uc_tag = UnityCatalogTag.from_key_value(
external_tag.key.original,
external_tag.value.original if external_tag.value is not None else None,
external_tag.key.raw_text,
external_tag.value.raw_text if external_tag.value is not None else None,
)
# Should have same original values
assert uc_tag.key.original == external_tag.key.original
assert uc_tag.key.raw_text == external_tag.key.raw_text
if external_tag.value is not None and uc_tag.value is not None:
assert uc_tag.value.original == external_tag.value.original
assert uc_tag.value.raw_text == external_tag.value.raw_text
# But different processing rules
assert str(uc_tag.key) != str(external_tag.key) # Different sanitization
@ -391,22 +635,22 @@ class TestIntegration:
for key, value in test_cases:
# Test ExternalTag
ext_tag = ExternalTag.from_key_value(key, value)
assert ext_tag.key.original == key
assert ext_tag.key.raw_text == key
if value:
assert ext_tag.value is not None
assert ext_tag.value.original == value
assert ext_tag.value.raw_text == value
# Test round-trip through URN
urn = ext_tag.to_datahub_tag_urn()
parsed_tag = ExternalTag.from_urn(urn)
assert parsed_tag.key.original == key
assert parsed_tag.key.raw_text == key
if value:
assert parsed_tag.value is not None
assert parsed_tag.value.original == value
assert parsed_tag.value.raw_text == value
# Test UnityCatalogTag
uc_tag = UnityCatalogTag.from_key_value(key, value)
assert uc_tag.key.original == key
assert uc_tag.key.raw_text == key
if value:
assert uc_tag.value is not None
assert uc_tag.value.original == value
assert uc_tag.value.raw_text == value

View File

@ -1,18 +1,18 @@
[
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Environment:Production",
"primaryKey": "123412341234.Environment:Production",
"secondaryKeys": [
"urn:li:tag:Environment:Production"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Production\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -27,7 +27,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -43,7 +43,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -59,18 +59,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Environment:Test",
"primaryKey": "123412341234.Environment:Test",
"secondaryKeys": [
"urn:li:tag:Environment:Test"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Test\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -85,7 +85,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -101,7 +101,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -117,18 +117,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Owner:DataTeam",
"primaryKey": "123412341234.Owner:DataTeam",
"secondaryKeys": [
"urn:li:tag:Owner:DataTeam"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Owner\", \"tag_value\": \"DataTeam\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -143,7 +143,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -159,7 +159,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -294,18 +294,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Environment:Production",
"primaryKey": "123412341234.Environment:Production",
"secondaryKeys": [
"urn:li:tag:Environment:Production"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Production\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -320,7 +320,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -336,7 +336,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -352,18 +352,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Environment:Test",
"primaryKey": "123412341234.Environment:Test",
"secondaryKeys": [
"urn:li:tag:Environment:Test"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Test\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -378,7 +378,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -394,7 +394,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -410,18 +410,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Owner:DataTeam",
"primaryKey": "123412341234.Owner:DataTeam",
"secondaryKeys": [
"urn:li:tag:Owner:DataTeam"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Owner\", \"tag_value\": \"DataTeam\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -436,7 +436,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -452,7 +452,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -584,18 +584,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Environment:Production",
"primaryKey": "123412341234.Environment:Production",
"secondaryKeys": [
"urn:li:tag:Environment:Production"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Production\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -610,7 +610,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -626,7 +626,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136",
"entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -642,18 +642,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Environment:Test",
"primaryKey": "123412341234.Environment:Test",
"secondaryKeys": [
"urn:li:tag:Environment:Test"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Test\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -668,7 +668,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -684,7 +684,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d",
"entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -700,18 +700,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "Owner:DataTeam",
"primaryKey": "123412341234.Owner:DataTeam",
"secondaryKeys": [
"urn:li:tag:Owner:DataTeam"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Owner\", \"tag_value\": \"DataTeam\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -726,7 +726,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -742,7 +742,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4",
"entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -874,18 +874,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:2ef8f1035573e7cb66b2063296be56bf",
"entityUrn": "urn:li:platformResource:a8bd83f07806fbe4495337fdf27831c7",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "DataClassification:Sensitive",
"primaryKey": "123412341234.DataClassification:Sensitive",
"secondaryKeys": [
"urn:li:tag:DataClassification:Sensitive"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:DataClassification:Sensitive\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:DataClassification:Sensitive\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"DataClassification\", \"tag_value\": \"Sensitive\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -900,7 +900,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:2ef8f1035573e7cb66b2063296be56bf",
"entityUrn": "urn:li:platformResource:a8bd83f07806fbe4495337fdf27831c7",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -916,7 +916,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:2ef8f1035573e7cb66b2063296be56bf",
"entityUrn": "urn:li:platformResource:a8bd83f07806fbe4495337fdf27831c7",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {
@ -932,18 +932,18 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:683431632ff6ebbfa42920f8cec9df34",
"entityUrn": "urn:li:platformResource:3bdfff6730064bb49dcb275be445d1a6",
"changeType": "UPSERT",
"aspectName": "platformResourceInfo",
"aspect": {
"json": {
"resourceType": "LakeFormationTagPlatformResource",
"primaryKey": "BusinessUnit:Finance",
"primaryKey": "123412341234.BusinessUnit:Finance",
"secondaryKeys": [
"urn:li:tag:BusinessUnit:Finance"
],
"value": {
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:BusinessUnit:Finance\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:BusinessUnit:Finance\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"BusinessUnit\", \"tag_value\": \"Finance\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}",
"contentType": "JSON",
"schemaType": "JSON",
"schemaRef": "LakeFormationTagPlatformResource"
@ -958,7 +958,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:683431632ff6ebbfa42920f8cec9df34",
"entityUrn": "urn:li:platformResource:3bdfff6730064bb49dcb275be445d1a6",
"changeType": "UPSERT",
"aspectName": "dataPlatformInstance",
"aspect": {
@ -974,7 +974,7 @@
},
{
"entityType": "platformResource",
"entityUrn": "urn:li:platformResource:683431632ff6ebbfa42920f8cec9df34",
"entityUrn": "urn:li:platformResource:3bdfff6730064bb49dcb275be445d1a6",
"changeType": "UPSERT",
"aspectName": "status",
"aspect": {