2022-01-05 16:32:05 -08:00
|
|
|
from typing import Any, Dict
|
|
|
|
|
|
|
|
from datahub.metadata.com.linkedin.pegasus2avro.common import GlobalTags
|
|
|
|
from datahub.metadata.schema_classes import (
|
2022-06-24 04:43:12 -07:00
|
|
|
GlobalTagsClass,
|
2022-01-05 16:32:05 -08:00
|
|
|
GlossaryTermsClass,
|
|
|
|
OwnerClass,
|
|
|
|
OwnershipClass,
|
|
|
|
OwnershipSourceTypeClass,
|
2022-06-24 04:43:12 -07:00
|
|
|
OwnershipTypeClass,
|
2022-01-05 16:32:05 -08:00
|
|
|
)
|
|
|
|
from datahub.utilities.mapping import OperationProcessor
|
|
|
|
|
|
|
|
|
|
|
|
def get_operation_defs() -> Dict[str, Any]:
|
|
|
|
return {
|
|
|
|
"user_owner": {
|
|
|
|
"match": ".*",
|
|
|
|
"operation": "add_owner",
|
|
|
|
"config": {"owner_type": "user"},
|
|
|
|
},
|
|
|
|
"user_owner_2": {
|
|
|
|
"match": ".*",
|
|
|
|
"operation": "add_owner",
|
|
|
|
"config": {"owner_type": "user"},
|
|
|
|
},
|
|
|
|
"group.owner": {
|
|
|
|
"match": ".*",
|
|
|
|
"operation": "add_owner",
|
|
|
|
"config": {"owner_type": "group"},
|
|
|
|
},
|
|
|
|
"pii": {
|
|
|
|
"match": True,
|
|
|
|
"operation": "add_tag",
|
|
|
|
"config": {"tag": "has_pii_test"},
|
|
|
|
},
|
|
|
|
"int_property": {
|
|
|
|
"match": 1,
|
|
|
|
"operation": "add_tag",
|
|
|
|
"config": {"tag": "int_property"},
|
|
|
|
},
|
|
|
|
"double_property": {
|
|
|
|
"match": 2.5,
|
|
|
|
"operation": "add_term",
|
|
|
|
"config": {"term": "double_property"},
|
|
|
|
},
|
|
|
|
"governance.team_owner": {
|
|
|
|
"match": "Finan.*",
|
|
|
|
"operation": "add_term",
|
|
|
|
"config": {"term": "Finance.test"},
|
|
|
|
},
|
2022-04-25 19:56:45 +02:00
|
|
|
"tag": {
|
|
|
|
"match": ".*",
|
|
|
|
"operation": "add_tag",
|
|
|
|
"config": {"tag": "{{ $match }}"},
|
|
|
|
},
|
2022-01-05 16:32:05 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_not_matching():
|
|
|
|
# no property matches to the rules
|
|
|
|
raw_props = {
|
|
|
|
"user_owner_test": "test_user@abc.com",
|
|
|
|
"group.owner_test": "test.group@abc.co.in",
|
|
|
|
"governance.team_owner": "Binance",
|
|
|
|
"pii": False,
|
|
|
|
"int_property": 3,
|
|
|
|
"double_property": 25,
|
|
|
|
}
|
|
|
|
processor = OperationProcessor(get_operation_defs())
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_tag" not in aspect_map
|
|
|
|
assert "add_term" not in aspect_map
|
|
|
|
assert "add_owner" not in aspect_map
|
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_matching():
|
|
|
|
raw_props = {
|
|
|
|
"user_owner": "test_user@abc.com",
|
|
|
|
"user_owner_2": "test_user_2",
|
|
|
|
"group.owner": "test.group@abc.co.in",
|
|
|
|
"governance.team_owner": "Finance",
|
|
|
|
"pii": True,
|
|
|
|
"int_property": 1,
|
|
|
|
"double_property": 2.5,
|
2022-04-25 19:56:45 +02:00
|
|
|
"tag": "Finance",
|
2022-01-05 16:32:05 -08:00
|
|
|
}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs=get_operation_defs(),
|
|
|
|
owner_source_type="SOURCE_CONTROL",
|
|
|
|
strip_owner_email_id=True,
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_tag" in aspect_map
|
|
|
|
assert "add_term" in aspect_map
|
|
|
|
assert "add_owner" in aspect_map
|
|
|
|
tag_aspect: GlobalTags = aspect_map["add_tag"]
|
|
|
|
tags_added = [
|
|
|
|
tag_association_class.tag for tag_association_class in tag_aspect.tags
|
|
|
|
]
|
|
|
|
term_aspect: GlossaryTermsClass = aspect_map["add_term"]
|
|
|
|
terms_added = [
|
|
|
|
term_association_class.urn for term_association_class in term_aspect.terms
|
|
|
|
]
|
|
|
|
assert (
|
2022-04-25 19:56:45 +02:00
|
|
|
len(tags_added) == 3
|
2022-01-05 16:32:05 -08:00
|
|
|
and "urn:li:tag:has_pii_test" in tags_added
|
|
|
|
and "urn:li:tag:int_property" in tags_added
|
2022-04-25 19:56:45 +02:00
|
|
|
and "urn:li:tag:Finance" in tags_added
|
2022-01-05 16:32:05 -08:00
|
|
|
)
|
|
|
|
assert (
|
|
|
|
len(terms_added) == 2
|
|
|
|
and "urn:li:glossaryTerm:Finance.test" in terms_added
|
|
|
|
and "urn:li:glossaryTerm:double_property" in terms_added
|
|
|
|
)
|
|
|
|
|
|
|
|
ownership_aspect: OwnershipClass = aspect_map["add_owner"]
|
|
|
|
assert len(ownership_aspect.owners) == 3
|
|
|
|
owner_set = {
|
|
|
|
"urn:li:corpuser:test_user",
|
|
|
|
"urn:li:corpuser:test_user_2",
|
|
|
|
"urn:li:corpGroup:test.group",
|
|
|
|
}
|
|
|
|
for single_owner in ownership_aspect.owners:
|
|
|
|
assert single_owner.owner in owner_set
|
|
|
|
assert (
|
|
|
|
single_owner.source
|
|
|
|
and single_owner.source.type == OwnershipSourceTypeClass.SOURCE_CONTROL
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_no_email_strip_source_type_not_null():
|
|
|
|
raw_props = {
|
|
|
|
"user_owner": "test_user@abc.com",
|
|
|
|
}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs=get_operation_defs(),
|
|
|
|
owner_source_type="SERVICE",
|
|
|
|
strip_owner_email_id=False,
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_owner" in aspect_map
|
|
|
|
|
|
|
|
ownership_aspect: OwnershipClass = aspect_map["add_owner"]
|
|
|
|
assert len(ownership_aspect.owners) == 1
|
|
|
|
new_owner: OwnerClass = ownership_aspect.owners[0]
|
|
|
|
assert new_owner.owner == "urn:li:corpuser:test_user@abc.com"
|
|
|
|
assert new_owner.source and new_owner.source.type == "SERVICE"
|
2022-06-24 04:43:12 -07:00
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_advanced_matching_owners():
|
|
|
|
raw_props = {
|
|
|
|
"user_owner": "@test_user@abc.com",
|
|
|
|
}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs={
|
|
|
|
"user_owner": {
|
|
|
|
"match": "^@(.*)",
|
|
|
|
"operation": "add_owner",
|
|
|
|
"config": {"owner_type": "group"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
owner_source_type="SOURCE_CONTROL",
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_owner" in aspect_map
|
|
|
|
|
|
|
|
ownership_aspect: OwnershipClass = aspect_map["add_owner"]
|
|
|
|
assert len(ownership_aspect.owners) == 1
|
|
|
|
new_owner: OwnerClass = ownership_aspect.owners[0]
|
|
|
|
assert new_owner.owner == "urn:li:corpGroup:test_user@abc.com"
|
|
|
|
assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL"
|
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_ownership_category():
|
|
|
|
raw_props = {"user_owner": "@test_user", "business_owner": "alice"}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs={
|
|
|
|
"user_owner": {
|
|
|
|
"match": "^@(.*)",
|
|
|
|
"operation": "add_owner",
|
|
|
|
"config": {
|
|
|
|
"owner_type": "group",
|
|
|
|
"owner_category": OwnershipTypeClass.DATA_STEWARD,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"business_owner": {
|
|
|
|
"match": ".*",
|
|
|
|
"operation": "add_owner",
|
|
|
|
"config": {
|
|
|
|
"owner_type": "user",
|
|
|
|
"owner_category": OwnershipTypeClass.BUSINESS_OWNER,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
owner_source_type="SOURCE_CONTROL",
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_owner" in aspect_map
|
|
|
|
|
|
|
|
ownership_aspect: OwnershipClass = aspect_map["add_owner"]
|
|
|
|
assert len(ownership_aspect.owners) == 2
|
|
|
|
new_owner: OwnerClass = ownership_aspect.owners[0]
|
|
|
|
assert new_owner.owner == "urn:li:corpGroup:test_user"
|
|
|
|
assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL"
|
|
|
|
assert new_owner.type and new_owner.type == OwnershipTypeClass.DATA_STEWARD
|
|
|
|
|
|
|
|
new_owner = ownership_aspect.owners[1]
|
|
|
|
assert new_owner.owner == "urn:li:corpuser:alice"
|
|
|
|
assert new_owner.source and new_owner.source.type == "SOURCE_CONTROL"
|
|
|
|
assert new_owner.type and new_owner.type == OwnershipTypeClass.BUSINESS_OWNER
|
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_advanced_matching_tags():
|
|
|
|
raw_props = {
|
|
|
|
"case": "PLT-4567",
|
|
|
|
}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs={
|
|
|
|
"case": {
|
|
|
|
"match": "^PLT-(.*)",
|
|
|
|
"operation": "add_tag",
|
|
|
|
"config": {"tag": "case_{{ $match }}"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
owner_source_type="SOURCE_CONTROL",
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_tag" in aspect_map
|
|
|
|
|
|
|
|
tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
|
|
|
|
assert len(tag_aspect.tags) == 1
|
|
|
|
assert tag_aspect.tags[0].tag == "urn:li:tag:case_4567"
|
2023-09-23 05:41:42 +05:30
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_matching_nested_props():
|
|
|
|
raw_props = {
|
|
|
|
"gdpr": {
|
|
|
|
"pii": True,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs={
|
|
|
|
"gdpr.pii": {
|
|
|
|
"match": True,
|
|
|
|
"operation": "add_tag",
|
|
|
|
"config": {"tag": "pii"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
owner_source_type="SOURCE_CONTROL",
|
|
|
|
match_nested_props=True,
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_tag" in aspect_map
|
|
|
|
|
|
|
|
tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
|
|
|
|
assert len(tag_aspect.tags) == 1
|
|
|
|
assert tag_aspect.tags[0].tag == "urn:li:tag:pii"
|
|
|
|
|
|
|
|
|
|
|
|
def test_operation_processor_matching_dot_props():
|
|
|
|
raw_props = {
|
|
|
|
"gdpr.pii": True,
|
|
|
|
}
|
|
|
|
processor = OperationProcessor(
|
|
|
|
operation_defs={
|
|
|
|
"gdpr.pii": {
|
|
|
|
"match": True,
|
|
|
|
"operation": "add_tag",
|
|
|
|
"config": {"tag": "pii"},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
owner_source_type="SOURCE_CONTROL",
|
|
|
|
match_nested_props=True,
|
|
|
|
)
|
|
|
|
aspect_map = processor.process(raw_props)
|
|
|
|
assert "add_tag" in aspect_map
|
|
|
|
|
|
|
|
tag_aspect: GlobalTagsClass = aspect_map["add_tag"]
|
|
|
|
assert len(tag_aspect.tags) == 1
|
|
|
|
assert tag_aspect.tags[0].tag == "urn:li:tag:pii"
|