diff --git a/metadata-ingestion/src/datahub/emitter/mcp.py b/metadata-ingestion/src/datahub/emitter/mcp.py index 2102696df6..12ca39d136 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp.py +++ b/metadata-ingestion/src/datahub/emitter/mcp.py @@ -1,9 +1,11 @@ import dataclasses import json +import warnings from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform +from datahub.errors import DataHubDeprecationWarning from datahub.metadata.schema_classes import ( ChangeTypeClass, DictWrapper, @@ -75,13 +77,22 @@ class MetadataChangeProposalWrapper: if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET: self.entityType = guess_entity_type(self.entityUrn) elif self.entityUrn and self.entityType: - guessed_entity_type = guess_entity_type(self.entityUrn).lower() - # Entity type checking is actually case insensitive. - # Note that urns are case sensitive, but entity types are not. - if self.entityType.lower() != guessed_entity_type: + guessed_entity_type = guess_entity_type(self.entityUrn) + if self.entityType.lower() != guessed_entity_type.lower(): + # If they aren't a case-ignored match, raise an error. raise ValueError( f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}", ) + elif self.entityType != guessed_entity_type: + # If they only differ in case, normalize and print a warning. + self.entityType = guessed_entity_type + warnings.warn( + f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. " + "This will be automatically corrected for now, but will become an error in a future release. " + "Note that the entityType field is optional and will be automatically inferred from the entityUrn.", + DataHubDeprecationWarning, + stacklevel=3, + ) elif self.entityType == _ENTITY_TYPE_UNSET: raise ValueError("entityType must be set if entityUrn is not set") diff --git a/metadata-ingestion/src/datahub/errors.py b/metadata-ingestion/src/datahub/errors.py index 8ea38c2e2e..7230fd0f40 100644 --- a/metadata-ingestion/src/datahub/errors.py +++ b/metadata-ingestion/src/datahub/errors.py @@ -41,3 +41,7 @@ class ExperimentalWarning(Warning): class APITracingWarning(Warning): pass + + +class DataHubDeprecationWarning(DeprecationWarning): + pass diff --git a/metadata-ingestion/tests/unit/sdk/test_mcp_wrapper.py b/metadata-ingestion/tests/unit/sdk/test_mcp_wrapper.py index ed67467e77..87e0441f79 100644 --- a/metadata-ingestion/tests/unit/sdk/test_mcp_wrapper.py +++ b/metadata-ingestion/tests/unit/sdk/test_mcp_wrapper.py @@ -2,9 +2,10 @@ import pytest import datahub.metadata.schema_classes as models from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.errors import DataHubDeprecationWarning -def test_mcpw_inference(): +def test_mcpw_inference() -> None: mcpw = MetadataChangeProposalWrapper( entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)", aspect=models.DomainsClass(domains=["urn:li:domain:health"]), @@ -20,7 +21,17 @@ def test_mcpw_inference(): ) -def test_mcpw_from_obj(): +def test_mcpw_case_coercion() -> None: + with pytest.warns(DataHubDeprecationWarning): + mcpw = MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)", + entityType="DATASET", + aspect=models.DomainsClass(domains=["urn:li:domain:health"]), + ) + assert mcpw.entityType == "dataset" + + +def test_mcpw_from_obj() -> None: # Checks that the MCPW from_obj() method returns a MCPW instead # of an MCP with a serialized inner aspect object.