diff --git a/metadata-ingestion/src/datahub/api/entities/external/external_tag.py b/metadata-ingestion/src/datahub/api/entities/external/external_tag.py index 9692e73171..6439ba518c 100644 --- a/metadata-ingestion/src/datahub/api/entities/external/external_tag.py +++ b/metadata-ingestion/src/datahub/api/entities/external/external_tag.py @@ -47,12 +47,14 @@ class ExternalTag(BaseModel): if key is not None: # Direct initialization with key/value processed_key = ( - RestrictedText(key) if not isinstance(key, RestrictedText) else key + RestrictedText(raw_text=key) + if not isinstance(key, RestrictedText) + else key ) processed_value = None if value is not None: processed_value = ( - RestrictedText(value) + RestrictedText(raw_text=value) if not isinstance(value, RestrictedText) else value ) @@ -95,9 +97,9 @@ class ExternalTag(BaseModel): 'urn:li:tag:key:value' if value exists, otherwise 'urn:li:tag:key' """ if self.value is not None: - tag_name = f"{self.key.original}:{self.value.original}" + tag_name = f"{self.key.raw_text}:{self.value.raw_text}" else: - tag_name = self.key.original + tag_name = self.key.raw_text return TagUrn(name=tag_name) diff --git a/metadata-ingestion/src/datahub/api/entities/external/lake_formation_external_entites.py b/metadata-ingestion/src/datahub/api/entities/external/lake_formation_external_entites.py index 307c3ad5a1..3c3469b5d8 100644 --- a/metadata-ingestion/src/datahub/api/entities/external/lake_formation_external_entites.py +++ b/metadata-ingestion/src/datahub/api/entities/external/lake_formation_external_entites.py @@ -10,8 +10,9 @@ # Tag search using the workspace search UI is supported only for tables, views, and table columns. # Tag search requires exact term matching. # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional +from pydantic import validator from typing_extensions import ClassVar from datahub.api.entities.external.external_tag import ExternalTag @@ -19,21 +20,21 @@ from datahub.api.entities.external.restricted_text import RestrictedText class LakeFormationTagKeyText(RestrictedText): - """RestrictedText configured for Unity Catalog tag keys.""" + """RestrictedText configured for Lake Formation tag keys.""" - _default_max_length: ClassVar[int] = 50 - # Unity Catalog tag keys: alphanumeric, hyphens, underscores, periods only - _default_replacement_char: ClassVar[str] = "_" - _default_truncation_suffix: ClassVar[str] = "" # No suffix for clean identifiers + DEFAULT_MAX_LENGTH: ClassVar[int] = 50 + # Lake Formation tag keys restrictions + DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_" + DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers class LakeFormationTagValueText(RestrictedText): - """RestrictedText configured for Unity Catalog tag values.""" + """RestrictedText configured for Lake Formation tag values.""" - _default_max_length: ClassVar[int] = 50 - # Unity Catalog tag values are more permissive but still have some restrictions - _default_replacement_char: ClassVar[str] = " " - _default_truncation_suffix: ClassVar[str] = "..." + DEFAULT_MAX_LENGTH: ClassVar[int] = 50 + # Lake Formation tag values restrictions + DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " " + DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..." class LakeFormationTag(ExternalTag): @@ -49,43 +50,43 @@ class LakeFormationTag(ExternalTag): value: Optional[LakeFormationTagValueText] = None catalog: Optional[str] = None - def __init__( - self, - key: Optional[Union[str, LakeFormationTagKeyText]] = None, - value: Optional[Union[str, LakeFormationTagValueText]] = None, - **data: Any, - ) -> None: - """ - Initialize LakeFormation Tag from either a DataHub Tag URN or explicit key/value. + # Pydantic v1 validators + @validator("key", pre=True) + @classmethod + def _validate_key(cls, v: Any) -> LakeFormationTagKeyText: + """Validate and convert key field for Pydantic v1.""" + if isinstance(v, LakeFormationTagKeyText): + return v - Args: - key: Explicit key value (optional for Pydantic initialization) - value: Explicit value (optional) - **data: Additional Pydantic data - """ - if key is not None: - # Direct initialization with key/value - processed_key = ( - LakeFormationTagKeyText(key) - if not isinstance(key, LakeFormationTagKeyText) - else key - ) - processed_value = None - if value is not None: - processed_value = ( - LakeFormationTagValueText(value) - if not isinstance(value, LakeFormationTagValueText) - else value - ) + # If we get a RestrictedText object from parent class validation, use its raw_text value + if hasattr(v, "raw_text"): + return LakeFormationTagKeyText(raw_text=v.raw_text) - super().__init__( - key=processed_key, - value=processed_value, - **data, - ) - else: - # Standard pydantic initialization - super().__init__(**data) + return LakeFormationTagKeyText(raw_text=v) + + @validator("value", pre=True) + @classmethod + def _validate_value(cls, v: Any) -> Optional[LakeFormationTagValueText]: + """Validate and convert value field for Pydantic v1.""" + if v is None: + return None + + if isinstance(v, LakeFormationTagValueText): + return v + + # If we get a RestrictedText object from parent class validation, use its raw_text value + if hasattr(v, "raw_text"): + text_value = v.raw_text + # If value is an empty string, set it to None to not generate empty value in DataHub tag + if not str(text_value): + return None + return LakeFormationTagValueText(raw_text=text_value) + + # If value is an empty string, set it to None to not generate empty value in DataHub tag + if not str(v): + return None + + return LakeFormationTagValueText(raw_text=v) def __eq__(self, other: object) -> bool: """Check equality based on key and value.""" @@ -137,9 +138,9 @@ class LakeFormationTag(ExternalTag): Returns: Dictionary with 'key' and optionally 'value' """ - result: Dict[str, str] = {"key": self.key.original} + result: Dict[str, str] = {"key": self.key.raw_text} if self.value is not None: - result["value"] = self.value.original + result["value"] = self.value.raw_text return result def to_display_dict(self) -> Dict[str, str]: diff --git a/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py b/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py index 99e5534c65..2a3973f43e 100644 --- a/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py +++ b/metadata-ingestion/src/datahub/api/entities/external/restricted_text.py @@ -11,41 +11,12 @@ Features: from __future__ import annotations -from typing import Any, ClassVar, Optional, Set, Union +from typing import ClassVar, Optional, Set -# Check Pydantic version and import accordingly -try: - from pydantic import VERSION - - PYDANTIC_V2 = int(VERSION.split(".")[0]) >= 2 -except (ImportError, AttributeError): - # Fallback for older versions that don't have VERSION - PYDANTIC_V2 = False - -if PYDANTIC_V2: - from pydantic import GetCoreSchemaHandler # type: ignore[attr-defined] - from pydantic_core import core_schema -else: - from pydantic.validators import str_validator +from datahub.configuration.common import ConfigModel -class RestrictedTextConfig: - """Configuration class for RestrictedText.""" - - def __init__( - self, - max_length: Optional[int] = None, - forbidden_chars: Optional[Set[str]] = None, - replacement_char: Optional[str] = None, - truncation_suffix: Optional[str] = None, - ): - self.max_length = max_length - self.forbidden_chars = forbidden_chars - self.replacement_char = replacement_char - self.truncation_suffix = truncation_suffix - - -class RestrictedText(str): +class RestrictedText(ConfigModel): """A string type that stores the original value but returns a truncated and sanitized version. This type allows you to: @@ -60,8 +31,9 @@ class RestrictedText(str): # Basic usage with default settings name: RestrictedText - # Custom max length and character replacement using Field - custom_field: RestrictedText = RestrictedText.with_config( + # Custom max length and character replacement + custom_field: RestrictedText = RestrictedText( + text="hello-world.test", max_length=10, forbidden_chars={' ', '-', '.'}, replacement_char='_' @@ -73,175 +45,128 @@ class RestrictedText(str): custom_field="hello-world.test" ) - print(model.name) # Truncated and sanitized version - print(model.name.original) # Original value - print(model.custom_field) # "hello_worl..." + # model.name returns truncated and sanitized version + # model.name.raw_text returns original value + # model.custom_field returns "hello_worl..." ``` """ # Default configuration - _default_max_length: ClassVar[Optional[int]] = 50 - _default_forbidden_chars: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"} - _default_replacement_char: ClassVar[str] = "_" - _default_truncation_suffix: ClassVar[str] = "..." + DEFAULT_MAX_LENGTH: ClassVar[Optional[int]] = 50 + DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {" ", "\t", "\n", "\r"} + DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_" + DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..." - def __new__(cls, value: str = "") -> "RestrictedText": - """Create a new string instance.""" - instance = str.__new__(cls, "") # We'll set the display value later - return instance + raw_text: str + max_length: Optional[int] = None + forbidden_chars: Optional[Set[str]] = None + replacement_char: Optional[str] = None + truncation_suffix: Optional[str] = None + _processed_value: Optional[str] = None - def __init__(self, value: str = ""): - """Initialize the RestrictedText with a value.""" - self.original: str = value - self.max_length = self._default_max_length - self.forbidden_chars = self._default_forbidden_chars - self.replacement_char = self._default_replacement_char - self.truncation_suffix = self._default_truncation_suffix + def __init__(self, **data): + super().__init__(**data) + self.validate_text() - # Process the value - self._processed_value = self._process_value(value) + @classmethod + def __get_validators__(cls): + yield cls.pydantic_accept_raw_text + yield cls.validate + yield cls.pydantic_validate_text - def _configure( + @classmethod + def pydantic_accept_raw_text(cls, v): + if isinstance(v, (RestrictedText, dict)): + return v + assert isinstance(v, str), "text must be a string" + return {"text": v} + + @classmethod + def pydantic_validate_text(cls, v): + assert isinstance(v, RestrictedText) + assert v.validate_text() + return v + + @classmethod + def validate(cls, v): + """Validate and create a RestrictedText instance.""" + if isinstance(v, RestrictedText): + return v + + # This should be a dict at this point from pydantic_accept_raw_text + if isinstance(v, dict): + instance = cls(**v) + instance.validate_text() + return instance + + raise ValueError(f"Unable to validate RestrictedText from {type(v)}") + + def validate_text(self) -> bool: + """Validate the text and apply restrictions.""" + # Set defaults if not provided + max_length = ( + self.max_length if self.max_length is not None else self.DEFAULT_MAX_LENGTH + ) + forbidden_chars = ( + self.forbidden_chars + if self.forbidden_chars is not None + else self.DEFAULT_FORBIDDEN_CHARS + ) + replacement_char = ( + self.replacement_char + if self.replacement_char is not None + else self.DEFAULT_REPLACEMENT_CHAR + ) + truncation_suffix = ( + self.truncation_suffix + if self.truncation_suffix is not None + else self.DEFAULT_TRUNCATION_SUFFIX + ) + + # Store processed value + self._processed_value = self._process_value( + self.raw_text, + max_length, + forbidden_chars, + replacement_char, + truncation_suffix, + ) + return True + + def _process_value( self, - max_length: Optional[int] = None, - forbidden_chars: Optional[Set[str]] = None, - replacement_char: Optional[str] = None, - truncation_suffix: Optional[str] = None, - ) -> "RestrictedText": - """Configure this instance with custom settings.""" - if max_length is not None: - self.max_length = max_length - if forbidden_chars is not None: - self.forbidden_chars = forbidden_chars - if replacement_char is not None: - self.replacement_char = replacement_char - if truncation_suffix is not None: - self.truncation_suffix = truncation_suffix - - # Reprocess the value with new configuration - self._processed_value = self._process_value(self.original) - return self - - def _process_value(self, value: str) -> str: + value: str, + max_length: Optional[int], + forbidden_chars: Set[str], + replacement_char: str, + truncation_suffix: str, + ) -> str: """Process the value by replacing characters and truncating.""" # Replace specified characters processed = value - for char in self.forbidden_chars: - processed = processed.replace(char, self.replacement_char) + for char in forbidden_chars: + processed = processed.replace(char, replacement_char) # Truncate if necessary - if self.max_length is not None and len(processed) > self.max_length: - if len(self.truncation_suffix) >= self.max_length: + if max_length is not None and len(processed) > max_length: + if len(truncation_suffix) >= max_length: # If suffix is too long, just truncate without suffix - processed = processed[: self.max_length] + processed = processed[:max_length] else: # Truncate and add suffix - truncate_length = self.max_length - len(self.truncation_suffix) - processed = processed[:truncate_length] + self.truncation_suffix + truncate_length = max_length - len(truncation_suffix) + processed = processed[:truncate_length] + truncation_suffix return processed def __str__(self) -> str: """Return the processed (truncated and sanitized) value.""" - return self._processed_value + return self._processed_value or "" def __repr__(self) -> str: - return f"{self.__class__.__name__}({self._processed_value!r})" + return f"{self.__class__.__name__}({self.raw_text!r})" @property def processed(self) -> str: """Get the processed (truncated and sanitized) value.""" - return self._processed_value - - @classmethod - def with_config( - cls, - max_length: Optional[int] = None, - forbidden_chars: Optional[Set[str]] = None, - replacement_char: Optional[str] = None, - truncation_suffix: Optional[str] = None, - ) -> RestrictedTextConfig: - """Create a configuration object for use as field default. - - Args: - max_length: Maximum length of the processed string - forbidden_chars: Set of characters to replace - replacement_char: Character to use as replacement - truncation_suffix: Suffix to add when truncating - - Returns: - A configuration object that can be used as field default - """ - return RestrictedTextConfig( - max_length=max_length, - forbidden_chars=forbidden_chars, - replacement_char=replacement_char, - truncation_suffix=truncation_suffix, - ) - - # Pydantic v2 methods - if PYDANTIC_V2: - - @classmethod - def _validate( - cls, - __input_value: Union[str, "RestrictedText"], - _: core_schema.ValidationInfo, - ) -> "RestrictedText": - """Validate and create a RestrictedText instance.""" - if isinstance(__input_value, RestrictedText): - return __input_value - return cls(__input_value) - - @classmethod - def __get_pydantic_core_schema__( - cls, source: type[Any], handler: GetCoreSchemaHandler - ) -> core_schema.CoreSchema: - """Get the Pydantic core schema for this type.""" - return core_schema.with_info_after_validator_function( - cls._validate, - core_schema.str_schema(), - field_name=cls.__name__, - ) - - # Pydantic v1 methods - else: - - @classmethod - def __get_validators__(cls): - """Pydantic v1 validator method.""" - yield cls.validate - - @classmethod - def validate(cls, v, field=None): - """Validate and create a RestrictedText instance for Pydantic v1.""" - if isinstance(v, RestrictedText): - return v - - if not isinstance(v, str): - # Let pydantic handle the string validation - v = str_validator(v) - - # Create instance - instance = cls(v) - - # Check if there's a field default that contains configuration - if ( - field - and hasattr(field, "default") - and isinstance(field.default, RestrictedTextConfig) - ): - config = field.default - instance._configure( - max_length=config.max_length, - forbidden_chars=config.forbidden_chars, - replacement_char=config.replacement_char, - truncation_suffix=config.truncation_suffix, - ) - - return instance - - @classmethod - def __modify_schema__(cls, field_schema): - """Modify the JSON schema for Pydantic v1.""" - field_schema.update(type="string", examples=["example string"]) + return self._processed_value or "" diff --git a/metadata-ingestion/src/datahub/api/entities/external/unity_catalog_external_entites.py b/metadata-ingestion/src/datahub/api/entities/external/unity_catalog_external_entites.py index 32d3ed368a..399c6e66b3 100644 --- a/metadata-ingestion/src/datahub/api/entities/external/unity_catalog_external_entites.py +++ b/metadata-ingestion/src/datahub/api/entities/external/unity_catalog_external_entites.py @@ -10,8 +10,10 @@ # Tag search using the workspace search UI is supported only for tables, views, and table columns. # Tag search requires exact term matching. # https://learn.microsoft.com/en-us/azure/databricks/database-objects/tags#constraint -from typing import Any, Dict, Optional, Set, Union +from typing import Any, Dict, Optional, Set +# Import validator for Pydantic v1 (always needed since we removed conditional logic) +from pydantic import validator from typing_extensions import ClassVar from datahub.api.entities.external.external_tag import ExternalTag @@ -21,9 +23,9 @@ from datahub.api.entities.external.restricted_text import RestrictedText class UnityCatalogTagKeyText(RestrictedText): """RestrictedText configured for Unity Catalog tag keys.""" - _default_max_length: ClassVar[int] = 255 - # Unity Catalog tag keys: alphanumeric, hyphens, underscores, periods only - _default_forbidden_chars: ClassVar[Set[str]] = { + DEFAULT_MAX_LENGTH: ClassVar[int] = 255 + # Unity Catalog tag keys: forbidden characters based on constraints + DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = { "\t", "\n", "\r", @@ -34,18 +36,18 @@ class UnityCatalogTagKeyText(RestrictedText): "/", ":", } - _default_replacement_char: ClassVar[str] = "_" - _default_truncation_suffix: ClassVar[str] = "" # No suffix for clean identifiers + DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = "_" + DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "" # No suffix for clean identifiers class UnityCatalogTagValueText(RestrictedText): """RestrictedText configured for Unity Catalog tag values.""" - _default_max_length: ClassVar[int] = 1000 + DEFAULT_MAX_LENGTH: ClassVar[int] = 1000 # Unity Catalog tag values are more permissive but still have some restrictions - _default_forbidden_chars: ClassVar[Set[str]] = {"\t", "\n", "\r"} - _default_replacement_char: ClassVar[str] = " " - _default_truncation_suffix: ClassVar[str] = "..." + DEFAULT_FORBIDDEN_CHARS: ClassVar[Set[str]] = {"\t", "\n", "\r"} + DEFAULT_REPLACEMENT_CHAR: ClassVar[str] = " " + DEFAULT_TRUNCATION_SUFFIX: ClassVar[str] = "..." class UnityCatalogTag(ExternalTag): @@ -60,46 +62,43 @@ class UnityCatalogTag(ExternalTag): key: UnityCatalogTagKeyText value: Optional[UnityCatalogTagValueText] = None - def __init__( - self, - key: Optional[Union[str, UnityCatalogTagKeyText]] = None, - value: Optional[Union[str, UnityCatalogTagValueText]] = None, - **data: Any, - ) -> None: - """ - Initialize UnityCatalogTag from either a DataHub Tag URN or explicit key/value. + # Pydantic v1 validators + @validator("key", pre=True) + @classmethod + def _validate_key(cls, v: Any) -> UnityCatalogTagKeyText: + """Validate and convert key field for Pydantic v1.""" + if isinstance(v, UnityCatalogTagKeyText): + return v - Args: - key: Explicit key value (optional for Pydantic initialization) - value: Explicit value (optional) - **data: Additional Pydantic data - """ - if key is not None: - # Direct initialization with key/value - processed_key = ( - UnityCatalogTagKeyText(key) - if not isinstance(key, UnityCatalogTagKeyText) - else key - ) - processed_value = None - if value is not None: - processed_value = ( - UnityCatalogTagValueText(value) - if not isinstance(value, UnityCatalogTagValueText) - else value - ) - # If value is an empty string, set it to None to not generater empty value in DataHub tag which results in key: tags - if not str(value): - processed_value = None + # If we get a RestrictedText object from parent class validation, use its raw_text value + if hasattr(v, "raw_text"): + return UnityCatalogTagKeyText(raw_text=v.raw_text) - super().__init__( - key=processed_key, - value=processed_value, - **data, - ) - else: - # Standard pydantic initialization - super().__init__(**data) + return UnityCatalogTagKeyText(raw_text=v) + + @validator("value", pre=True) + @classmethod + def _validate_value(cls, v: Any) -> Optional[UnityCatalogTagValueText]: + """Validate and convert value field for Pydantic v1.""" + if v is None: + return None + + if isinstance(v, UnityCatalogTagValueText): + return v + + # If we get a RestrictedText object from parent class validation, use its raw_text value + if hasattr(v, "raw_text"): + text_value = v.raw_text + # If value is an empty string, set it to None to not generate empty value in DataHub tag + if not str(text_value): + return None + return UnityCatalogTagValueText(raw_text=text_value) + + # If value is an empty string, set it to None to not generate empty value in DataHub tag + if not str(v): + return None + + return UnityCatalogTagValueText(raw_text=v) def __eq__(self, other: object) -> bool: """Check equality based on key and value.""" @@ -124,7 +123,7 @@ class UnityCatalogTag(ExternalTag): Returns: UnityCatalogTag instance """ - return cls(key=tag_dict["key"], value=tag_dict.get("value")) + return cls(**tag_dict) @classmethod def from_key_value(cls, key: str, value: Optional[str] = None) -> "UnityCatalogTag": @@ -149,9 +148,9 @@ class UnityCatalogTag(ExternalTag): Returns: Dictionary with 'key' and optionally 'value' """ - result: Dict[str, str] = {"key": self.key.original} + result: Dict[str, str] = {"key": self.key.raw_text} if self.value is not None: - result["value"] = self.value.original + result["value"] = self.value.raw_text return result def to_display_dict(self) -> Dict[str, str]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 04a77908ef..cd13418525 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -395,7 +395,7 @@ class GlueSource(StatefulIngestionSourceBase): t = LakeFormationTag( key=tag_key, value=tag_value, - catalog_id=catalog_id, + catalog=catalog_id, ) tags.append(t) return tags @@ -438,7 +438,7 @@ class GlueSource(StatefulIngestionSourceBase): t = LakeFormationTag( key=tag_key, value=tag_value, - catalog_id=catalog_id, + catalog=catalog_id, ) tags.append(t) return tags diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py b/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py index b122388851..67812abb35 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/tag_entities.py @@ -88,8 +88,8 @@ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId): return existing_platform_resource return LakeFormationTagPlatformResourceId( - tag_key=tag.key, - tag_value=tag.value if tag.value is not None else None, + tag_key=str(tag.key), + tag_value=str(tag.value) if tag.value is not None else None, platform_instance=platform_instance, exists_in_lake_formation=exists_in_lake_formation, catalog=catalog, diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py b/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py index 992ba5a6d8..88d3abc28e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/tag_entities.py @@ -77,13 +77,13 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId): ) if existing_platform_resource: logger.info( - f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.original}: {existing_platform_resource}" + f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}" ) return existing_platform_resource return UnityCatalogTagPlatformResourceId( - tag_key=tag.key.original, - tag_value=tag.value.original if tag.value is not None else None, + tag_key=tag.key.raw_text, + tag_value=tag.value.raw_text if tag.value is not None else None, platform_instance=platform_instance, exists_in_unity_catalog=exists_in_unity_catalog, persisted=False, diff --git a/metadata-ingestion/tests/unit/api/entities/external/test_restrictions.py b/metadata-ingestion/tests/unit/api/entities/external/test_restrictions.py index 1d6dc78926..ae44cef22b 100644 --- a/metadata-ingestion/tests/unit/api/entities/external/test_restrictions.py +++ b/metadata-ingestion/tests/unit/api/entities/external/test_restrictions.py @@ -5,6 +5,11 @@ Tests for ExternalTag and UnityCatalogTag classes. from datahub.api.entities.external.external_tag import ( ExternalTag, ) +from datahub.api.entities.external.lake_formation_external_entites import ( + LakeFormationTag, + LakeFormationTagKeyText, + LakeFormationTagValueText, +) from datahub.api.entities.external.restricted_text import RestrictedText from datahub.api.entities.external.unity_catalog_external_entites import ( UnityCatalogTag, @@ -19,30 +24,30 @@ class TestRestrictedText: def test_basic_functionality(self): """Test basic RestrictedText creation and processing.""" - text = RestrictedText("Hello World! This is a test.") + text = RestrictedText(raw_text="Hello World! This is a test.") assert str(text) == "Hello_World!_This_is_a_test." - assert text.original == "Hello World! This is a test." + assert text.raw_text == "Hello World! This is a test." assert text.processed == "Hello_World!_This_is_a_test." def test_truncation(self): """Test text truncation with default settings.""" long_text = "A" * 60 # Longer than default 50 chars - text = RestrictedText(long_text) + text = RestrictedText(raw_text=long_text) assert len(str(text)) == 50 assert str(text).endswith("...") - assert text.original == long_text + assert text.raw_text == long_text def test_custom_configuration(self): """Test RestrictedText with custom configuration.""" - text = RestrictedText("hello-world.test") - text._configure( + text = RestrictedText( + raw_text="hello-world.test", max_length=10, forbidden_chars={"-", "."}, replacement_char="_", truncation_suffix="...", ) assert str(text) == "hello_w..." - assert text.original == "hello-world.test" + assert text.raw_text == "hello-world.test" class TestExternalTag: @@ -51,37 +56,37 @@ class TestExternalTag: def test_from_urn_with_value(self): """Test creating ExternalTag from URN with key:value.""" tag = ExternalTag.from_urn("urn:li:tag:environment:production") - assert tag.key.original == "environment" + assert tag.key.raw_text == "environment" assert tag.value is not None - assert tag.value.original == "production" + assert tag.value.raw_text == "production" assert str(tag.key) == "environment" assert str(tag.value) == "production" def test_from_urn_key_only(self): """Test creating ExternalTag from URN with key only.""" tag = ExternalTag.from_urn("urn:li:tag:critical") - assert tag.key.original == "critical" + assert tag.key.raw_text == "critical" assert tag.value is None assert str(tag.key) == "critical" def test_from_urn_multiple_colons(self): """Test URN parsing with multiple colons (only splits on first).""" tag = ExternalTag.from_urn("urn:li:tag:database:mysql:version:8.0") - assert tag.key.original == "database" + assert tag.key.raw_text == "database" assert tag.value is not None - assert tag.value.original == "mysql:version:8.0" + assert tag.value.raw_text == "mysql:version:8.0" def test_from_key_value(self): """Test creating ExternalTag from explicit key/value.""" tag = ExternalTag.from_key_value("team", "data-engineering") - assert tag.key.original == "team" + assert tag.key.raw_text == "team" assert tag.value is not None - assert tag.value.original == "data-engineering" + assert tag.value.raw_text == "data-engineering" def test_from_key_only(self): """Test creating ExternalTag from key only.""" tag = ExternalTag.from_key_value("critical") - assert tag.key.original == "critical" + assert tag.key.raw_text == "critical" assert tag.value is None def test_to_datahub_tag_urn_with_value(self): @@ -126,10 +131,10 @@ class TestExternalTag: assert "\t" not in str(tag.value) # Tabs replaced # But originals should be preserved - assert " " in tag.key.original - assert "!" in tag.key.original - assert "\n" in tag.value.original - assert "\t" in tag.value.original + assert " " in tag.key.raw_text + assert "!" in tag.key.raw_text + assert "\n" in tag.value.raw_text + assert "\t" in tag.value.raw_text def test_get_datahub_tag_fallback(self): """Test get_datahub_tag fallback when DataHub is not available.""" @@ -158,12 +163,251 @@ class TestExternalTag: assert value == "mysql:version:8.0" +class TestLakeFormationTagKeyText: + """Tests for LakeFormationTagKeyText.""" + + def test_key_restrictions(self): + """Test Lake Formation key restrictions.""" + key_text = LakeFormationTagKeyText(raw_text="data-source with spaces!") + + # Should replace spaces and other characters + processed = str(key_text) + assert " " not in processed + assert "_" in processed # Replacement character + + # Should preserve original + assert key_text.raw_text == "data-source with spaces!" + + def test_key_length_limit(self): + """Test Lake Formation key length limit (50 chars).""" + long_key = "a" * 60 # Longer than 50 chars + key_text = LakeFormationTagKeyText(raw_text=long_key) + + assert len(str(key_text)) <= 50 + assert key_text.raw_text == long_key + + def test_valid_key_characters(self): + """Test that valid characters are preserved.""" + valid_key = "environment_prod_v1_2" + key_text = LakeFormationTagKeyText(raw_text=valid_key) + + # These should be preserved (valid characters) + assert str(key_text) == valid_key + + def test_no_truncation_suffix(self): + """Test that Lake Formation keys don't use truncation suffix.""" + long_key = "a" * 60 + key_text = LakeFormationTagKeyText(raw_text=long_key) + processed = str(key_text) + + # Should not end with dots since DEFAULT_TRUNCATION_SUFFIX is "" + assert not processed.endswith("...") + assert len(processed) == 50 + + +class TestLakeFormationTagValueText: + """Tests for LakeFormationTagValueText.""" + + def test_value_restrictions(self): + """Test Lake Formation value restrictions.""" + value_text = LakeFormationTagValueText( + raw_text="Database Instance\nWith control chars\t" + ) + + # Should replace control characters with spaces + processed = str(value_text) + assert "\n" not in processed + assert "\t" not in processed + assert " " in processed # Replacement character + + # Should preserve original + assert "\n" in value_text.raw_text + assert "\t" in value_text.raw_text + + def test_value_length_limit(self): + """Test Lake Formation value length limit (50 chars).""" + long_value = "a" * 60 # Longer than 50 chars + value_text = LakeFormationTagValueText(raw_text=long_value) + + assert len(str(value_text)) <= 50 + assert str(value_text).endswith("...") + assert value_text.raw_text == long_value + + def test_permissive_characters(self): + """Test that most characters are allowed in values.""" + complex_value = "MySQL: 8.0 (Primary) - Special chars: @#$%^&*" + value_text = LakeFormationTagValueText(raw_text=complex_value) + + # Most characters should be preserved (more permissive than keys) + processed = str(value_text) + assert ":" in processed + assert "(" in processed + assert "@" in processed + assert "#" in processed + + +class TestLakeFormationTag: + """Tests for LakeFormationTag class.""" + + def test_from_key_value(self): + """Test creating LakeFormationTag from key/value.""" + tag = LakeFormationTag.from_key_value("environment", "production") + assert tag.key.raw_text == "environment" + assert tag.value is not None + assert tag.value.raw_text == "production" + + def test_from_dict(self): + """Test creating LakeFormationTag from dictionary.""" + tag_dict = {"key": "team owner", "value": "data engineering"} + tag = LakeFormationTag.from_dict(tag_dict) + + assert tag.key.raw_text == "team owner" + assert tag.value is not None + assert tag.value.raw_text == "data engineering" + + def test_to_dict(self): + """Test converting LakeFormationTag to dictionary.""" + tag = LakeFormationTag.from_key_value("environment", "production") + result = tag.to_dict() + + expected = {"key": "environment", "value": "production"} + assert result == expected + + def test_to_display_dict(self): + """Test converting LakeFormationTag to display dictionary.""" + tag = LakeFormationTag.from_key_value("data source type", "MySQL: 8.0") + result = tag.to_display_dict() + + # Should show processed values + assert result["key"] != "data source type" # Should be processed + assert " " not in result["key"] # Spaces replaced + assert "_" in result["key"] # Replacement character + + def test_key_only_tag(self): + """Test LakeFormationTag with key only.""" + tag = LakeFormationTag.from_key_value("critical") + assert tag.key.raw_text == "critical" + assert tag.value is None + + result = tag.to_dict() + expected = {"key": "critical"} + assert result == expected + + def test_direct_initialization(self): + """Test direct initialization with strings (uses validators).""" + tag = LakeFormationTag(key="environment", value="production") + assert tag.key.raw_text == "environment" + assert tag.value is not None + assert tag.value.raw_text == "production" + + def test_direct_initialization_with_objects(self): + """Test direct initialization with RestrictedText objects.""" + key_obj = LakeFormationTagKeyText(raw_text="team") + value_obj = LakeFormationTagValueText(raw_text="engineering") + tag = LakeFormationTag(key=key_obj, value=value_obj) + + assert tag.key.raw_text == "team" + assert tag.value is not None + assert tag.value.raw_text == "engineering" + + def test_truncation_detection(self): + """Test truncation detection properties.""" + # Long key (over 50 chars) + long_key = "a" * 60 + tag1 = LakeFormationTag.from_key_value(long_key, "short_value") + assert tag1.key.raw_text == long_key # Original preserved + assert len(str(tag1.key)) == 50 # Processed truncated + + # Long value (over 50 chars) + long_value = "b" * 60 + tag2 = LakeFormationTag.from_key_value("short_key", long_value) + assert tag2.value is not None + assert tag2.value.raw_text == long_value # Original preserved + assert len(str(tag2.value)) == 50 # Processed truncated + + # No truncation + tag3 = LakeFormationTag.from_key_value("short", "short") + assert tag3.value is not None + assert str(tag3.value) == "short" + assert str(tag3.key) == "short" + + def test_string_representation(self): + """Test string representation of LakeFormationTag.""" + tag_with_value = LakeFormationTag.from_key_value("env", "prod") + tag_key_only = LakeFormationTag.from_key_value("critical") + + assert str(tag_with_value) == "env:prod" + assert str(tag_key_only) == "critical" + + def test_character_sanitization(self): + """Test that invalid characters are properly sanitized.""" + # Test key sanitization (spaces replaced with underscores) + tag = LakeFormationTag.from_key_value("data source main", "value") + processed_key = str(tag.key) + assert " " not in processed_key + assert "_" in processed_key # Replacement char + + # Test value sanitization (control chars replaced with spaces) + tag2 = LakeFormationTag.from_key_value("key", "line1\nline2\tcolumn") + assert tag2.value is not None + processed_value = str(tag2.value) + assert "\n" not in processed_value + assert "\t" not in processed_value + assert " " in processed_value # Replacement char + + def test_api_compatibility(self): + """Test compatibility with Lake Formation API format.""" + # Simulate API response format + api_data = {"key": "data source type", "value": "PostgreSQL DB"} + tag = LakeFormationTag.from_dict(api_data) + + # Should be able to convert back to API format + api_output = tag.to_dict() + assert api_output["key"] == "data source type" # Original preserved + assert api_output["value"] == "PostgreSQL DB" # Original preserved + + # Display format should show processed values + display_output = tag.to_display_dict() + assert " " not in display_output["key"] # Should be sanitized + + def test_empty_value_handling(self): + """Test that empty values are handled correctly.""" + # Empty string value should become None + tag = LakeFormationTag.from_key_value("key", "") + assert tag.value is None + + result = tag.to_dict() + assert "value" not in result or result.get("value") is None + + def test_equality_and_hashing(self): + """Test equality and hashing of LakeFormationTag objects.""" + tag1 = LakeFormationTag.from_key_value("environment", "production") + tag2 = LakeFormationTag.from_key_value("environment", "production") + tag3 = LakeFormationTag.from_key_value("environment", "staging") + + # Same tags should be equal + assert tag1 == tag2 + assert hash(tag1) == hash(tag2) + + # Different tags should not be equal + assert tag1 != tag3 + assert hash(tag1) != hash(tag3) + + def test_repr(self): + """Test repr representation of LakeFormationTag.""" + tag = LakeFormationTag.from_key_value("env", "prod") + repr_str = repr(tag) + assert "LakeFormationTag" in repr_str + assert "env" in repr_str + assert "prod" in repr_str + + class TestUnityCatalogTagKeyText: """Tests for UnityCatalogTagKeyText.""" def test_key_restrictions(self): """Test Unity Catalog key restrictions.""" - key_text = UnityCatalogTagKeyText("data-source/type@main!") + key_text = UnityCatalogTagKeyText(raw_text="data-source/type@main!") # Should replace invalid characters processed = str(key_text) @@ -172,20 +416,20 @@ class TestUnityCatalogTagKeyText: assert "@" in processed # Replacement character # Should preserve original - assert key_text.original == "data-source/type@main!" + assert key_text.raw_text == "data-source/type@main!" def test_key_length_limit(self): """Test Unity Catalog key length limit (127 chars).""" long_key = "a" * 260 # Longer than 127 chars - key_text = UnityCatalogTagKeyText(long_key) + key_text = UnityCatalogTagKeyText(raw_text=long_key) assert len(str(key_text)) <= 255 - assert key_text.original == long_key + assert key_text.raw_text == long_key def test_valid_key_characters(self): """Test that valid characters are preserved.""" valid_key = "environment_prod_v1_2" - key_text = UnityCatalogTagKeyText(valid_key) + key_text = UnityCatalogTagKeyText(raw_text=valid_key) # These should be preserved (valid UC characters) assert str(key_text) == valid_key @@ -197,7 +441,7 @@ class TestUnityCatalogTagValueText: def test_value_restrictions(self): """Test Unity Catalog value restrictions.""" value_text = UnityCatalogTagValueText( - "MySQL Database: 8.0 (Primary)\nProduction Instance" + raw_text="MySQL Database: 8.0 (Primary)\nProduction Instance" ) # Should replace control characters @@ -205,21 +449,21 @@ class TestUnityCatalogTagValueText: assert "\n" not in processed # Should preserve original - assert "\n" in value_text.original + assert "\n" in value_text.raw_text def test_value_length_limit(self): """Test Unity Catalog value length limit (1000 chars).""" long_value = "a" * 1010 # Longer than 1000 chars - value_text = UnityCatalogTagValueText(long_value) + value_text = UnityCatalogTagValueText(raw_text=long_value) assert len(str(value_text)) <= 1000 assert str(value_text).endswith("...") - assert value_text.original == long_value + assert value_text.raw_text == long_value def test_permissive_characters(self): """Test that most characters are allowed in values.""" complex_value = "MySQL: 8.0 (Primary) - Special chars: @#$%^&*" - value_text = UnityCatalogTagValueText(complex_value) + value_text = UnityCatalogTagValueText(raw_text=complex_value) # Most characters should be preserved (more permissive than keys) processed = str(value_text) @@ -235,18 +479,18 @@ class TestUnityCatalogTag: def test_from_key_value(self): """Test creating UnityCatalogTag from key/value.""" tag = UnityCatalogTag.from_key_value("environment", "production") - assert tag.key.original == "environment" + assert tag.key.raw_text == "environment" assert tag.value is not None - assert tag.value.original == "production" + assert tag.value.raw_text == "production" def test_from_dict(self): """Test creating UnityCatalogTag from dictionary.""" tag_dict = {"key": "team/owner", "value": "data-engineering@company.com"} tag = UnityCatalogTag.from_dict(tag_dict) - assert tag.key.original == "team/owner" + assert tag.key.raw_text == "team/owner" assert tag.value is not None - assert tag.value.original == "data-engineering@company.com" + assert tag.value.raw_text == "data-engineering@company.com" def test_to_dict(self): """Test converting UnityCatalogTag to dictionary.""" @@ -269,7 +513,7 @@ class TestUnityCatalogTag: def test_key_only_tag(self): """Test UnityCatalogTag with key only.""" tag = UnityCatalogTag.from_key_value("critical") - assert tag.key.original == "critical" + assert tag.key.raw_text == "critical" assert tag.value is None result = tag.to_dict() @@ -348,14 +592,14 @@ class TestIntegration: # Convert to UnityCatalogTag uc_tag = UnityCatalogTag.from_key_value( - external_tag.key.original, - external_tag.value.original if external_tag.value is not None else None, + external_tag.key.raw_text, + external_tag.value.raw_text if external_tag.value is not None else None, ) # Should have same original values - assert uc_tag.key.original == external_tag.key.original + assert uc_tag.key.raw_text == external_tag.key.raw_text if external_tag.value is not None and uc_tag.value is not None: - assert uc_tag.value.original == external_tag.value.original + assert uc_tag.value.raw_text == external_tag.value.raw_text # But different processing rules assert str(uc_tag.key) != str(external_tag.key) # Different sanitization @@ -391,22 +635,22 @@ class TestIntegration: for key, value in test_cases: # Test ExternalTag ext_tag = ExternalTag.from_key_value(key, value) - assert ext_tag.key.original == key + assert ext_tag.key.raw_text == key if value: assert ext_tag.value is not None - assert ext_tag.value.original == value + assert ext_tag.value.raw_text == value # Test round-trip through URN urn = ext_tag.to_datahub_tag_urn() parsed_tag = ExternalTag.from_urn(urn) - assert parsed_tag.key.original == key + assert parsed_tag.key.raw_text == key if value: assert parsed_tag.value is not None - assert parsed_tag.value.original == value + assert parsed_tag.value.raw_text == value # Test UnityCatalogTag uc_tag = UnityCatalogTag.from_key_value(key, value) - assert uc_tag.key.original == key + assert uc_tag.key.raw_text == key if value: assert uc_tag.value is not None - assert uc_tag.value.original == value + assert uc_tag.value.raw_text == value diff --git a/metadata-ingestion/tests/unit/glue/glue_mces_lake_formation_tags_golden.json b/metadata-ingestion/tests/unit/glue/glue_mces_lake_formation_tags_golden.json index e66ef4e699..4a86019a24 100644 --- a/metadata-ingestion/tests/unit/glue/glue_mces_lake_formation_tags_golden.json +++ b/metadata-ingestion/tests/unit/glue/glue_mces_lake_formation_tags_golden.json @@ -1,18 +1,18 @@ [ { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Environment:Production", + "primaryKey": "123412341234.Environment:Production", "secondaryKeys": [ "urn:li:tag:Environment:Production" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Production\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -27,7 +27,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -43,7 +43,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -59,18 +59,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Environment:Test", + "primaryKey": "123412341234.Environment:Test", "secondaryKeys": [ "urn:li:tag:Environment:Test" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Test\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -85,7 +85,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -101,7 +101,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -117,18 +117,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Owner:DataTeam", + "primaryKey": "123412341234.Owner:DataTeam", "secondaryKeys": [ "urn:li:tag:Owner:DataTeam" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Owner\", \"tag_value\": \"DataTeam\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -143,7 +143,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -159,7 +159,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -294,18 +294,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Environment:Production", + "primaryKey": "123412341234.Environment:Production", "secondaryKeys": [ "urn:li:tag:Environment:Production" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Production\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -320,7 +320,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -336,7 +336,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -352,18 +352,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Environment:Test", + "primaryKey": "123412341234.Environment:Test", "secondaryKeys": [ "urn:li:tag:Environment:Test" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Test\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -378,7 +378,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -394,7 +394,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -410,18 +410,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Owner:DataTeam", + "primaryKey": "123412341234.Owner:DataTeam", "secondaryKeys": [ "urn:li:tag:Owner:DataTeam" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Owner\", \"tag_value\": \"DataTeam\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -436,7 +436,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -452,7 +452,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -584,18 +584,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Environment:Production", + "primaryKey": "123412341234.Environment:Production", "secondaryKeys": [ "urn:li:tag:Environment:Production" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Production\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Production\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -610,7 +610,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -626,7 +626,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:1d46304f5450edddc3b2208fdf442136", + "entityUrn": "urn:li:platformResource:5deeb4819bf87899250f0da32e6af0cf", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -642,18 +642,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Environment:Test", + "primaryKey": "123412341234.Environment:Test", "secondaryKeys": [ "urn:li:tag:Environment:Test" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Environment:Test\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Environment\", \"tag_value\": \"Test\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -668,7 +668,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -684,7 +684,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:25417c4f65df9f764d936705e2c95c0d", + "entityUrn": "urn:li:platformResource:fc4d54a1388a90c25f277d611d201274", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -700,18 +700,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "Owner:DataTeam", + "primaryKey": "123412341234.Owner:DataTeam", "secondaryKeys": [ "urn:li:tag:Owner:DataTeam" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:Owner:DataTeam\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"Owner\", \"tag_value\": \"DataTeam\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -726,7 +726,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -742,7 +742,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:7afa98a893b156f801a5e02146f24db4", + "entityUrn": "urn:li:platformResource:ecd80d8c7aa8c99d3ab7e6ca52b16b47", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -874,18 +874,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:2ef8f1035573e7cb66b2063296be56bf", + "entityUrn": "urn:li:platformResource:a8bd83f07806fbe4495337fdf27831c7", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "DataClassification:Sensitive", + "primaryKey": "123412341234.DataClassification:Sensitive", "secondaryKeys": [ "urn:li:tag:DataClassification:Sensitive" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:DataClassification:Sensitive\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:DataClassification:Sensitive\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"DataClassification\", \"tag_value\": \"Sensitive\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -900,7 +900,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:2ef8f1035573e7cb66b2063296be56bf", + "entityUrn": "urn:li:platformResource:a8bd83f07806fbe4495337fdf27831c7", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -916,7 +916,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:2ef8f1035573e7cb66b2063296be56bf", + "entityUrn": "urn:li:platformResource:a8bd83f07806fbe4495337fdf27831c7", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -932,18 +932,18 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:683431632ff6ebbfa42920f8cec9df34", + "entityUrn": "urn:li:platformResource:3bdfff6730064bb49dcb275be445d1a6", "changeType": "UPSERT", "aspectName": "platformResourceInfo", "aspect": { "json": { "resourceType": "LakeFormationTagPlatformResource", - "primaryKey": "BusinessUnit:Finance", + "primaryKey": "123412341234.BusinessUnit:Finance", "secondaryKeys": [ "urn:li:tag:BusinessUnit:Finance" ], "value": { - "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:BusinessUnit:Finance\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"\", \"tag_value\": \"\", \"platform_instance\": null, \"catalog\": null, \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", + "blob": "{\"datahub_urns\": {\"urns\": [\"urn:li:tag:BusinessUnit:Finance\"]}, \"managed_by_datahub\": false, \"id\": {\"tag_key\": \"BusinessUnit\", \"tag_value\": \"Finance\", \"platform_instance\": null, \"catalog\": \"123412341234\", \"exists_in_lake_formation\": false, \"persisted\": false}, \"allowed_values\": null}", "contentType": "JSON", "schemaType": "JSON", "schemaRef": "LakeFormationTagPlatformResource" @@ -958,7 +958,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:683431632ff6ebbfa42920f8cec9df34", + "entityUrn": "urn:li:platformResource:3bdfff6730064bb49dcb275be445d1a6", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -974,7 +974,7 @@ }, { "entityType": "platformResource", - "entityUrn": "urn:li:platformResource:683431632ff6ebbfa42920f8cec9df34", + "entityUrn": "urn:li:platformResource:3bdfff6730064bb49dcb275be445d1a6", "changeType": "UPSERT", "aspectName": "status", "aspect": {