mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-25 18:38:55 +00:00
feat(sdk): auto-fix bad entity type casing (#13218)
This commit is contained in:
parent
5ba8b7d173
commit
64bda48b51
@ -1,9 +1,11 @@
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import json
|
import json
|
||||||
|
import warnings
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE
|
||||||
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform
|
||||||
|
from datahub.errors import DataHubDeprecationWarning
|
||||||
from datahub.metadata.schema_classes import (
|
from datahub.metadata.schema_classes import (
|
||||||
ChangeTypeClass,
|
ChangeTypeClass,
|
||||||
DictWrapper,
|
DictWrapper,
|
||||||
@ -75,13 +77,22 @@ class MetadataChangeProposalWrapper:
|
|||||||
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET:
|
||||||
self.entityType = guess_entity_type(self.entityUrn)
|
self.entityType = guess_entity_type(self.entityUrn)
|
||||||
elif self.entityUrn and self.entityType:
|
elif self.entityUrn and self.entityType:
|
||||||
guessed_entity_type = guess_entity_type(self.entityUrn).lower()
|
guessed_entity_type = guess_entity_type(self.entityUrn)
|
||||||
# Entity type checking is actually case insensitive.
|
if self.entityType.lower() != guessed_entity_type.lower():
|
||||||
# Note that urns are case sensitive, but entity types are not.
|
# If they aren't a case-ignored match, raise an error.
|
||||||
if self.entityType.lower() != guessed_entity_type:
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
|
f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}",
|
||||||
)
|
)
|
||||||
|
elif self.entityType != guessed_entity_type:
|
||||||
|
# If they only differ in case, normalize and print a warning.
|
||||||
|
self.entityType = guessed_entity_type
|
||||||
|
warnings.warn(
|
||||||
|
f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. "
|
||||||
|
"This will be automatically corrected for now, but will become an error in a future release. "
|
||||||
|
"Note that the entityType field is optional and will be automatically inferred from the entityUrn.",
|
||||||
|
DataHubDeprecationWarning,
|
||||||
|
stacklevel=3,
|
||||||
|
)
|
||||||
elif self.entityType == _ENTITY_TYPE_UNSET:
|
elif self.entityType == _ENTITY_TYPE_UNSET:
|
||||||
raise ValueError("entityType must be set if entityUrn is not set")
|
raise ValueError("entityType must be set if entityUrn is not set")
|
||||||
|
|
||||||
|
@ -41,3 +41,7 @@ class ExperimentalWarning(Warning):
|
|||||||
|
|
||||||
class APITracingWarning(Warning):
|
class APITracingWarning(Warning):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DataHubDeprecationWarning(DeprecationWarning):
|
||||||
|
pass
|
||||||
|
@ -2,9 +2,10 @@ import pytest
|
|||||||
|
|
||||||
import datahub.metadata.schema_classes as models
|
import datahub.metadata.schema_classes as models
|
||||||
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
||||||
|
from datahub.errors import DataHubDeprecationWarning
|
||||||
|
|
||||||
|
|
||||||
def test_mcpw_inference():
|
def test_mcpw_inference() -> None:
|
||||||
mcpw = MetadataChangeProposalWrapper(
|
mcpw = MetadataChangeProposalWrapper(
|
||||||
entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)",
|
entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)",
|
||||||
aspect=models.DomainsClass(domains=["urn:li:domain:health"]),
|
aspect=models.DomainsClass(domains=["urn:li:domain:health"]),
|
||||||
@ -20,7 +21,17 @@ def test_mcpw_inference():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_mcpw_from_obj():
|
def test_mcpw_case_coercion() -> None:
|
||||||
|
with pytest.warns(DataHubDeprecationWarning):
|
||||||
|
mcpw = MetadataChangeProposalWrapper(
|
||||||
|
entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)",
|
||||||
|
entityType="DATASET",
|
||||||
|
aspect=models.DomainsClass(domains=["urn:li:domain:health"]),
|
||||||
|
)
|
||||||
|
assert mcpw.entityType == "dataset"
|
||||||
|
|
||||||
|
|
||||||
|
def test_mcpw_from_obj() -> None:
|
||||||
# Checks that the MCPW from_obj() method returns a MCPW instead
|
# Checks that the MCPW from_obj() method returns a MCPW instead
|
||||||
# of an MCP with a serialized inner aspect object.
|
# of an MCP with a serialized inner aspect object.
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user