mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-25 07:54:37 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			234 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			234 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Any, Dict
 | |
| 
 | |
| from datahub.metadata.com.linkedin.pegasus2avro.common import GlobalTags
 | |
| from datahub.metadata.schema_classes import (
 | |
|     GlobalTagsClass,
 | |
|     GlossaryTermsClass,
 | |
|     OwnerClass,
 | |
|     OwnershipClass,
 | |
|     OwnershipSourceTypeClass,
 | |
|     OwnershipTypeClass,
 | |
| )
 | |
| from datahub.utilities.mapping import OperationProcessor
 | |
| 
 | |
| 
 | |
| def get_operation_defs() -> Dict[str, Any]:
 | |
|     return {
 | |
|         "user_owner": {
 | |
|             "match": ".*",
 | |
|             "operation": "add_owner",
 | |
|             "config": {"owner_type": "user"},
 | |
|         },
 | |
|         "user_owner_2": {
 | |
|             "match": ".*",
 | |
|             "operation": "add_owner",
 | |
|             "config": {"owner_type": "user"},
 | |
|         },
 | |
|         "group.owner": {
 | |
|             "match": ".*",
 | |
|             "operation": "add_owner",
 | |
|             "config": {"owner_type": "group"},
 | |
|         },
 | |
|         "pii": {
 | |
|             "match": True,
 | |
|             "operation": "add_tag",
 | |
|             "config": {"tag": "has_pii_test"},
 | |
|         },
 | |
|         "int_property": {
 | |
|             "match": 1,
 | |
|             "operation": "add_tag",
 | |
|             "config": {"tag": "int_property"},
 | |
|         },
 | |
|         "double_property": {
 | |
|             "match": 2.5,
 | |
|             "operation": "add_term",
 | |
|             "config": {"term": "double_property"},
 | |
|         },
 | |
|         "governance.team_owner": {
 | |
|             "match": "Finan.*",
 | |
|             "operation": "add_term",
 | |
|             "config": {"term": "Finance.test"},
 | |
|         },
 | |
|         "tag": {
 | |
|             "match": ".*",
 | |
|             "operation": "add_tag",
 | |
|             "config": {"tag": "{{ $match }}"},
 | |
|         },
 | |
|     }
 | |
| 
 | |
| 
 | |
| def test_operation_processor_not_matching():
 | |
|     # no property matches to the rules
 | |
|     raw_props = {
 | |
|         "user_owner_test": "test_user@abc.com",
 | |
|         "group.owner_test": "test.group@abc.co.in",
 | |
|         "governance.team_owner": "Binance",
 | |
|         "pii": False,
 | |
|         "int_property": 3,
 | |
|         "double_property": 25,
 | |
|     }
 | |
|     processor = OperationProcessor(get_operation_defs())
 | |
|     aspect_map = processor.process(raw_props)
 | |
|     assert "add_tag" not in aspect_map
 | |
|     assert "add_term" not in aspect_map
 | |
|     assert "add_owner" not in aspect_map
 | |
| 
 | |
| 
 | |
| def test_operation_processor_matching():
 | |
|     raw_props = {
 | |
|         "user_owner": "test_user@abc.com",
 | |
|         "user_owner_2": "test_user_2",
 | |
|         "group.owner": "test.group@abc.co.in",
 | |
|         "governance.team_owner": "Finance",
 | |
|         "pii": True,
 | |
|         "int_property": 1,
 | |
|         "double_property": 2.5,
 | |
|         "tag": "Finance",
 | |
|     }
 | |
|     processor = OperationProcessor(
 | |
|         operation_defs=get_operation_defs(),
 | |
|         owner_source_type="SOURCE_CONTROL",
 | |
|         strip_owner_email_id=True,
 | |
|     )
 | |
|     aspect_map = processor.process(raw_props)
 | |
|     assert "add_tag" in aspect_map
 | |
|     assert "add_term" in aspect_map
 | |
|     assert "add_owner" in aspect_map
 | |
|     tag_aspect: GlobalTags = aspect_map["add_tag"]
 | |
|     tags_added = [
 | |
|         tag_association_class.tag for tag_association_class in tag_aspect.tags
 | |
|     ]
 | |
|     term_aspect: GlossaryTermsClass = aspect_map["add_term"]
 | |
|     terms_added = [
 | |
|         term_association_class.urn for term_association_class in term_aspect.terms
 | |
|     ]
 | |
|     assert (
 | |
|         len(tags_added) == 3
 | |
|         and "urn:li:tag:has_pii_test" in tags_added
 | |
|         and "urn:li:tag:int_property" in tags_added
 | |
|         and "urn:li:tag:Finance" in tags_added
 | |
|     )
 | |
|     assert (
 | |
|         len(terms_added) == 2
 | |
|         and "urn:li:glossaryTerm:Finance.test" in terms_added
 | |
|         and "urn:li:glossaryTerm:double_property" in terms_added
 | |
|     )
 | |
| 
 | |
|     ownership_aspect: OwnershipClass = aspect_map["add_owner"]
 | |
|     assert len(ownership_aspect.owners) == 3
 | |
|     owner_set = {
 | |
|         "urn:li:corpuser:test_user",
 | |
|         "urn:li:corpuser:test_user_2",
 | |
|         "urn:li:corpGroup:test.group",
 | |
|     }
 | |
|     for single_owner in ownership_aspect.owners:
 | |
|         assert single_owner.owner in owner_set
 | |
|         assert (
 | |
|             single_owner.source
 | |
|             and single_owner.source.type == OwnershipSourceTypeClass.SOURCE_CONTROL
 | |
|         )
 | |
| 
 | |
| 
 | |
| def test_operation_processor_no_email_strip_source_type_not_null():
 | |
|     raw_props = {
 | |
|         "user_owner": "test_user@abc.com",
 | |
|     }
 | |
|     processor = OperationProcessor(
 | |
|         operation_defs=get_operation_defs(),
 | |
|         owner_source_type="SERVICE",
 | |
|         strip_owner_email_id=False,
 | |
|     )
 | |
|     aspect_map = processor.process(raw_props)
 | |
|     assert "add_owner" in aspect_map
 | |
| 
 | |
|     ownership_aspect: OwnershipClass = aspect_map["add_owner"]
 | |
|     assert len(ownership_aspect.owners) == 1
 | |
|     new_owner: OwnerClass = ownership_aspect.owners[0]
 | |
|     assert new_owner.owner == "urn:li:corpuser:test_user@abc.com"
 | |
|     assert new_owner.source and new_owner.source.type == "SERVICE"
 | |
| 
 | |
| 
 | |
| def test_operation_processor_advanced_matching_owners():
 | |
|     raw_props = {
 | |
|         "user_owner": "@test_user@abc.com",
 | |
|     }
 | |
|     processor = OperationProcessor(
 | |
|         operation_defs={
 | |
|             "user_owner": {
 | |
|                 "match": "^@(.*)",
 | |
|                 "operation": "add_owner",
 | |
|                 "config": {"owner_type": "group"},
 | |
|             },
 | |
|         },
 | |
|         owner_source_type="SOURCE_CONTROL",
 | |
|     )
 | |
|     aspect_map = processor.process(raw_props)
 | |
|     assert "add_owner" in aspect_map
 | |
| 
 | |
|     ownership_aspect: OwnershipClass = aspect_map["add_owner"]
 | |
|     assert len(ownership_aspect.owners) == 1
 | |
|     new_owner: OwnerClass = ownership_aspect.owners[0]
 | |
|     assert new_owner.owner == "urn:li:corpGroup:test_user@abc.com"
 | |
|     assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL"
 | |
| 
 | |
| 
 | |
| def test_operation_processor_ownership_category():
 | |
|     raw_props = {"user_owner": "@test_user", "business_owner": "alice"}
 | |
|     processor = OperationProcessor(
 | |
|         operation_defs={
 | |
|             "user_owner": {
 | |
|                 "match": "^@(.*)",
 | |
|                 "operation": "add_owner",
 | |
|                 "config": {
 | |
|                     "owner_type": "group",
 | |
|                     "owner_category": OwnershipTypeClass.DATA_STEWARD,
 | |
|                 },
 | |
|             },
 | |
|             "business_owner": {
 | |
|                 "match": ".*",
 | |
|                 "operation": "add_owner",
 | |
|                 "config": {
 | |
|                     "owner_type": "user",
 | |
|                     "owner_category": OwnershipTypeClass.BUSINESS_OWNER,
 | |
|                 },
 | |
|             },
 | |
|         },
 | |
|         owner_source_type="SOURCE_CONTROL",
 | |
|     )
 | |
|     aspect_map = processor.process(raw_props)
 | |
|     assert "add_owner" in aspect_map
 | |
| 
 | |
|     ownership_aspect: OwnershipClass = aspect_map["add_owner"]
 | |
|     assert len(ownership_aspect.owners) == 2
 | |
|     new_owner: OwnerClass = ownership_aspect.owners[0]
 | |
|     assert new_owner.owner == "urn:li:corpGroup:test_user"
 | |
|     assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL"
 | |
|     assert new_owner.type and new_owner.type == OwnershipTypeClass.DATA_STEWARD
 | |
| 
 | |
|     new_owner = ownership_aspect.owners[1]
 | |
|     assert new_owner.owner == "urn:li:corpuser:alice"
 | |
|     assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL"
 | |
|     assert new_owner.type and new_owner.type == OwnershipTypeClass.BUSINESS_OWNER
 | |
| 
 | |
| 
 | |
| def test_operation_processor_advanced_matching_tags():
 | |
|     raw_props = {
 | |
|         "case": "PLT-4567",
 | |
|     }
 | |
|     processor = OperationProcessor(
 | |
|         operation_defs={
 | |
|             "case": {
 | |
|                 "match": "^PLT-(.*)",
 | |
|                 "operation": "add_tag",
 | |
|                 "config": {"tag": "case_{{ $match }}"},
 | |
|             },
 | |
|         },
 | |
|         owner_source_type="SOURCE_CONTROL",
 | |
|     )
 | |
|     aspect_map = processor.process(raw_props)
 | |
|     assert "add_tag" in aspect_map
 | |
| 
 | |
|     tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
 | |
|     assert len(tag_aspect.tags) == 1
 | |
|     assert tag_aspect.tags[0].tag == "urn:li:tag:case_4567"
 | 
