feat(sdk): auto-fix bad entity type casing (#13218)

This commit is contained in:
Harshal Sheth 2025-04-21 00:40:00 -04:00 committed by GitHub
parent 5ba8b7d173
commit 64bda48b51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 6 deletions

View File

@ -1,9 +1,11 @@
import dataclasses
import json
import warnings
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
from datahub.errors import DataHubDeprecationWarning
from datahub.metadata.schema_classes import (
ChangeTypeClass,
DictWrapper,
@ -75,13 +77,22 @@ class MetadataChangeProposalWrapper:
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
self.entityType = guess_entity_type(self.entityUrn)
elif self.entityUrn and self.entityType:
guessed_entity_type = guess_entity_type(self.entityUrn).lower()
# Entity type checking is actually case insensitive.
# Note that urns are case sensitive, but entity types are not.
if self.entityType.lower() != guessed_entity_type:
guessed_entity_type = guess_entity_type(self.entityUrn)
if self.entityType.lower() != guessed_entity_type.lower():
# If they aren't a case-ignored match, raise an error.
raise ValueError(
f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
)
elif self.entityType != guessed_entity_type:
# If they only differ in case, normalize and print a warning.
self.entityType = guessed_entity_type
warnings.warn(
f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
"This will be automatically corrected for now, but will become an error in a future release. "
"Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
DataHubDeprecationWarning,
stacklevel=3,
)
elif self.entityType == _ENTITY_TYPE_UNSET:
raise ValueError("entityType must be set if entityUrn is not set")

View File

@ -41,3 +41,7 @@ class ExperimentalWarning(Warning):
class APITracingWarning(Warning):
pass
class DataHubDeprecationWarning(DeprecationWarning):
pass

View File

@ -2,9 +2,10 @@ import pytest
import datahub.metadata.schema_classes as models
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.errors import DataHubDeprecationWarning
def test_mcpw_inference():
def test_mcpw_inference() -> None:
mcpw = MetadataChangeProposalWrapper(
entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)",
aspect=models.DomainsClass(domains=["urn:li:domain:health"]),
@ -20,7 +21,17 @@ def test_mcpw_inference():
)
def test_mcpw_from_obj():
def test_mcpw_case_coercion() -> None:
with pytest.warns(DataHubDeprecationWarning):
mcpw = MetadataChangeProposalWrapper(
entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)",
entityType="DATASET",
aspect=models.DomainsClass(domains=["urn:li:domain:health"]),
)
assert mcpw.entityType == "dataset"
def test_mcpw_from_obj() -> None:
# Checks that the MCPW from_obj() method returns a MCPW instead
# of an MCP with a serialized inner aspect object.