mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 02:37:05 +00:00 
			
		
		
		
	feat(sdk): auto-fix bad entity type casing (#13218)
This commit is contained in:
		
							parent
							
								
									5ba8b7d173
								
							
						
					
					
						commit
						64bda48b51
					
				| @ -1,9 +1,11 @@ | |||||||
| import dataclasses | import dataclasses | ||||||
| import json | import json | ||||||
|  | import warnings | ||||||
| from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union | from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union | ||||||
| 
 | 
 | ||||||
| from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE | from datahub.emitter.aspect import ASPECT_MAP, JSON_CONTENT_TYPE | ||||||
| from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform | from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform | ||||||
|  | from datahub.errors import DataHubDeprecationWarning | ||||||
| from datahub.metadata.schema_classes import ( | from datahub.metadata.schema_classes import ( | ||||||
|     ChangeTypeClass, |     ChangeTypeClass, | ||||||
|     DictWrapper, |     DictWrapper, | ||||||
| @ -75,13 +77,22 @@ class MetadataChangeProposalWrapper: | |||||||
|         if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET: |         if self.entityUrn and self.entityType == _ENTITY_TYPE_UNSET: | ||||||
|             self.entityType = guess_entity_type(self.entityUrn) |             self.entityType = guess_entity_type(self.entityUrn) | ||||||
|         elif self.entityUrn and self.entityType: |         elif self.entityUrn and self.entityType: | ||||||
|             guessed_entity_type = guess_entity_type(self.entityUrn).lower() |             guessed_entity_type = guess_entity_type(self.entityUrn) | ||||||
|             # Entity type checking is actually case insensitive. |             if self.entityType.lower() != guessed_entity_type.lower(): | ||||||
|             # Note that urns are case sensitive, but entity types are not. |                 # If they aren't a case-ignored match, raise an error. | ||||||
|             if self.entityType.lower() != guessed_entity_type: |  | ||||||
|                 raise ValueError( |                 raise ValueError( | ||||||
|                     f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}", |                     f"entityType {self.entityType} does not match the entity type {guessed_entity_type} from entityUrn {self.entityUrn}", | ||||||
|                 ) |                 ) | ||||||
|  |             elif self.entityType != guessed_entity_type: | ||||||
|  |                 # If they only differ in case, normalize and print a warning. | ||||||
|  |                 self.entityType = guessed_entity_type | ||||||
|  |                 warnings.warn( | ||||||
|  |                     f"The passed entityType {self.entityType} differs in case from the expected entity type {guessed_entity_type}. " | ||||||
|  |                     "This will be automatically corrected for now, but will become an error in a future release. " | ||||||
|  |                     "Note that the entityType field is optional and will be automatically inferred from the entityUrn.", | ||||||
|  |                     DataHubDeprecationWarning, | ||||||
|  |                     stacklevel=3, | ||||||
|  |                 ) | ||||||
|         elif self.entityType == _ENTITY_TYPE_UNSET: |         elif self.entityType == _ENTITY_TYPE_UNSET: | ||||||
|             raise ValueError("entityType must be set if entityUrn is not set") |             raise ValueError("entityType must be set if entityUrn is not set") | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -41,3 +41,7 @@ class ExperimentalWarning(Warning): | |||||||
| 
 | 
 | ||||||
| class APITracingWarning(Warning): | class APITracingWarning(Warning): | ||||||
|     pass |     pass | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DataHubDeprecationWarning(DeprecationWarning): | ||||||
|  |     pass | ||||||
|  | |||||||
| @ -2,9 +2,10 @@ import pytest | |||||||
| 
 | 
 | ||||||
| import datahub.metadata.schema_classes as models | import datahub.metadata.schema_classes as models | ||||||
| from datahub.emitter.mcp import MetadataChangeProposalWrapper | from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||||||
|  | from datahub.errors import DataHubDeprecationWarning | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_mcpw_inference(): | def test_mcpw_inference() -> None: | ||||||
|     mcpw = MetadataChangeProposalWrapper( |     mcpw = MetadataChangeProposalWrapper( | ||||||
|         entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)", |         entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)", | ||||||
|         aspect=models.DomainsClass(domains=["urn:li:domain:health"]), |         aspect=models.DomainsClass(domains=["urn:li:domain:health"]), | ||||||
| @ -20,7 +21,17 @@ def test_mcpw_inference(): | |||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_mcpw_from_obj(): | def test_mcpw_case_coercion() -> None: | ||||||
|  |     with pytest.warns(DataHubDeprecationWarning): | ||||||
|  |         mcpw = MetadataChangeProposalWrapper( | ||||||
|  |             entityUrn="urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)", | ||||||
|  |             entityType="DATASET", | ||||||
|  |             aspect=models.DomainsClass(domains=["urn:li:domain:health"]), | ||||||
|  |         ) | ||||||
|  |     assert mcpw.entityType == "dataset" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_mcpw_from_obj() -> None: | ||||||
|     # Checks that the MCPW from_obj() method returns a MCPW instead |     # Checks that the MCPW from_obj() method returns a MCPW instead | ||||||
|     # of an MCP with a serialized inner aspect object. |     # of an MCP with a serialized inner aspect object. | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Harshal Sheth
						Harshal Sheth