datahub/smoke-test/tests/patch/common_patch_tests.py

412 lines
14 KiB
Python

import time
import uuid
from typing import Dict, Optional, Type
from datahub.emitter.mce_builder import (
make_dataset_urn,
make_schema_field_urn,
make_tag_urn,
make_term_urn,
make_user_urn,
)
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_patch_builder import MetadataPatchProposal
from datahub.ingestion.graph.client import DataHubGraph
from datahub.metadata.schema_classes import (
AuditStampClass,
FineGrainedLineageClass,
FineGrainedLineageDownstreamTypeClass,
FineGrainedLineageUpstreamTypeClass,
GlobalTagsClass,
GlossaryTermAssociationClass,
GlossaryTermsClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
TagAssociationClass,
_Aspect,
)
from datahub.specific.aspect_helpers.fine_grained_lineage import (
HasFineGrainedLineagePatch,
)
def helper_test_entity_terms_patch(
graph_client: DataHubGraph,
test_entity_urn: str,
patch_builder_class: Type[MetadataPatchProposal],
):
def get_terms(entity_urn):
return graph_client.get_aspect(
entity_urn=entity_urn,
aspect_type=GlossaryTermsClass,
)
term_urn = make_term_urn(term=f"testTerm-{uuid.uuid4()}")
term_association = GlossaryTermAssociationClass(urn=term_urn, context="test")
global_terms = GlossaryTermsClass(
terms=[term_association],
auditStamp=AuditStampClass(
time=int(time.time() * 1000.0), actor=make_user_urn("tester")
),
)
mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_terms)
graph_client.emit_mcp(mcpw)
terms_read = get_terms(test_entity_urn)
assert terms_read.terms[0].urn == term_urn
assert terms_read.terms[0].context == "test"
new_term = GlossaryTermAssociationClass(urn=make_term_urn(f"test-{uuid.uuid4()}"))
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "add_term")
for patch_mcp in patch_builder.add_term(new_term).build():
graph_client.emit_mcp(patch_mcp)
pass
terms_read = get_terms(test_entity_urn)
assert terms_read.terms[0].urn == term_urn
assert terms_read.terms[0].context == "test"
assert terms_read.terms[1].urn == new_term.urn
assert terms_read.terms[1].context is None
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "remove_term")
for patch_mcp in patch_builder.remove_term(term_urn).build():
graph_client.emit_mcp(patch_mcp)
pass
terms_read = get_terms(test_entity_urn)
assert len(terms_read.terms) == 1
assert terms_read.terms[0].urn == new_term.urn
def helper_test_dataset_tags_patch(
graph_client: DataHubGraph,
test_entity_urn: str,
patch_builder_class: Type[MetadataPatchProposal],
):
tag_urn = make_tag_urn(tag=f"testTag-{uuid.uuid4()}")
tag_association = TagAssociationClass(tag=tag_urn, context="test")
global_tags = GlobalTagsClass(tags=[tag_association])
mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_tags)
graph_client.emit_mcp(mcpw)
tags_read = graph_client.get_aspect(
entity_urn=test_entity_urn,
aspect_type=GlobalTagsClass,
)
assert tags_read is not None
assert tags_read.tags[0].tag == tag_urn
assert tags_read.tags[0].context == "test"
new_tag = TagAssociationClass(tag=make_tag_urn(f"test-{uuid.uuid4()}"))
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "add_tag")
for patch_mcp in patch_builder.add_tag(new_tag).build():
graph_client.emit_mcp(patch_mcp)
pass
tags_read = graph_client.get_aspect(
entity_urn=test_entity_urn,
aspect_type=GlobalTagsClass,
)
assert tags_read is not None
assert tags_read.tags[0].tag == tag_urn
assert tags_read.tags[0].context == "test"
assert tags_read.tags[1].tag == new_tag.tag
assert tags_read.tags[1].context is None
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "remove_tag")
for patch_mcp in patch_builder.remove_tag(tag_urn).build():
graph_client.emit_mcp(patch_mcp)
pass
tags_read = graph_client.get_aspect(
entity_urn=test_entity_urn,
aspect_type=GlobalTagsClass,
)
assert tags_read is not None
assert len(tags_read.tags) == 1
assert tags_read.tags[0].tag == new_tag.tag
def helper_test_ownership_patch(
graph_client: DataHubGraph,
test_entity_urn: str,
patch_builder_class: Type[MetadataPatchProposal],
):
owner_to_set = OwnerClass(
owner=make_user_urn("jdoe"), type=OwnershipTypeClass.DATAOWNER
)
ownership_to_set = OwnershipClass(owners=[owner_to_set])
owner_to_add = OwnerClass(
owner=make_user_urn("gdoe"), type=OwnershipTypeClass.DATAOWNER
)
mcpw = MetadataChangeProposalWrapper(
entityUrn=test_entity_urn, aspect=ownership_to_set
)
graph_client.emit_mcp(mcpw)
owner = graph_client.get_aspect(
entity_urn=test_entity_urn, aspect_type=OwnershipClass
)
assert owner is not None
assert owner.owners[0].owner == make_user_urn("jdoe")
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "add_owner")
for patch_mcp in patch_builder.add_owner(owner_to_add).build():
graph_client.emit_mcp(patch_mcp)
owner = graph_client.get_aspect(
entity_urn=test_entity_urn, aspect_type=OwnershipClass
)
assert owner is not None
assert len(owner.owners) == 2
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "remove_owner")
for patch_mcp in patch_builder.remove_owner(make_user_urn("gdoe")).build():
graph_client.emit_mcp(patch_mcp)
owner = graph_client.get_aspect(
entity_urn=test_entity_urn, aspect_type=OwnershipClass
)
assert owner is not None
assert len(owner.owners) == 1
assert owner.owners[0].owner == make_user_urn("jdoe")
def helper_test_custom_properties_patch(
graph_client: DataHubGraph,
test_entity_urn: str,
patch_builder_class: Type[MetadataPatchProposal],
custom_properties_aspect_class: Type[_Aspect],
base_aspect: _Aspect,
):
def get_custom_properties(entity_urn: str) -> Optional[Dict[str, str]]:
custom_properties_aspect = graph_client.get_aspect(
entity_urn=entity_urn,
aspect_type=custom_properties_aspect_class,
)
assert custom_properties_aspect
assert hasattr(custom_properties_aspect, "customProperties")
return custom_properties_aspect.customProperties
base_property_map = {"base_property": "base_property_value"}
orig_aspect = base_aspect
assert hasattr(orig_aspect, "customProperties")
orig_aspect.customProperties = base_property_map
mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=orig_aspect)
graph_client.emit(mcpw)
# assert custom properties looks as expected
custom_properties = get_custom_properties(test_entity_urn)
assert custom_properties
for k, v in base_property_map.items():
assert custom_properties[k] == v
new_properties = {
"test_property": "test_value",
"test_property1": "test_value1",
}
entity_patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(entity_patch_builder, "add_custom_property")
for k, v in new_properties.items():
entity_patch_builder.add_custom_property(k, v)
for patch_mcp in entity_patch_builder.build():
graph_client.emit_mcp(patch_mcp)
custom_properties = get_custom_properties(test_entity_urn)
assert custom_properties is not None
for k, v in new_properties.items():
assert custom_properties[k] == v
# ensure exising properties were not touched
for k, v in base_property_map.items():
assert custom_properties[k] == v
# Remove property
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "remove_custom_property")
for patch_mcp in patch_builder.remove_custom_property("test_property").build():
graph_client.emit_mcp(patch_mcp)
custom_properties = get_custom_properties(test_entity_urn)
assert custom_properties is not None
assert "test_property" not in custom_properties
assert custom_properties["test_property1"] == "test_value1"
# ensure exising properties were not touched
for k, v in base_property_map.items():
assert custom_properties[k] == v
# Replace custom properties
patch_builder = patch_builder_class(test_entity_urn)
assert hasattr(patch_builder, "set_custom_properties")
for patch_mcp in patch_builder.set_custom_properties(new_properties).build():
graph_client.emit_mcp(patch_mcp)
custom_properties = get_custom_properties(test_entity_urn)
assert custom_properties is not None
for k in base_property_map:
assert k not in custom_properties
for k, v in new_properties.items():
assert custom_properties[k] == v
# ensure existing fields were not touched
full_aspect: Optional[_Aspect] = graph_client.get_aspect(
test_entity_urn, custom_properties_aspect_class
)
assert full_aspect
for k, v in orig_aspect.__dict__.items():
assert full_aspect.__dict__[k] == v
def helper_test_add_fine_grained_lineage(
graph_client: DataHubGraph,
test_entity_urn: str,
aspect_type: Type[_Aspect],
patch_builder_class: Type[HasFineGrainedLineagePatch],
):
"""Test that add_fine_grained_lineage works correctly."""
def get_lineage_aspect():
return graph_client.get_aspect(
entity_urn=test_entity_urn,
aspect_type=aspect_type,
)
# Create test datasets and fields
source_dataset_urn = make_dataset_urn(
platform="postgres", name=f"source_table_{uuid.uuid4()}", env="PROD"
)
target_dataset_urn = make_dataset_urn(
platform="postgres", name=f"target_table_{uuid.uuid4()}", env="PROD"
)
source_field_urn = make_schema_field_urn(source_dataset_urn, "user_id")
target_field_urn = make_schema_field_urn(target_dataset_urn, "user_id")
fine_grained_lineage = FineGrainedLineageClass(
upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
upstreams=[source_field_urn],
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
downstreams=[target_field_urn],
transformOperation="OPERATION",
confidenceScore=1.0,
)
# Test that add_fine_grained_lineage works
patch_builder = patch_builder_class(test_entity_urn)
patch_builder.add_fine_grained_lineage(fine_grained_lineage)
for patch_mcp in patch_builder.build():
graph_client.emit_mcp(patch_mcp)
# Verify the lineage was applied
lineage_aspect = get_lineage_aspect()
assert lineage_aspect is not None
assert lineage_aspect.fineGrainedLineages is not None
assert len(lineage_aspect.fineGrainedLineages) == 1
stored_lineage = lineage_aspect.fineGrainedLineages[0]
assert stored_lineage.transformOperation == "OPERATION"
assert stored_lineage.confidenceScore == 1.0
assert stored_lineage.upstreams == [source_field_urn]
assert stored_lineage.downstreams == [target_field_urn]
# Cleanup: Remove the fine-grained lineage
patch_builder.remove_fine_grained_lineage(fine_grained_lineage)
for patch_mcp in patch_builder.build():
graph_client.emit_mcp(patch_mcp)
# Verify cleanup worked
lineage_aspect = get_lineage_aspect()
assert lineage_aspect is not None
assert (
lineage_aspect.fineGrainedLineages is None
or len(lineage_aspect.fineGrainedLineages) == 0
)
def helper_test_set_fine_grained_lineages(
graph_client: DataHubGraph,
test_entity_urn: str,
aspect_type: Type[_Aspect],
patch_builder_class: Type[HasFineGrainedLineagePatch],
):
"""Test setting fine-grained lineages."""
def get_lineage_aspect():
return graph_client.get_aspect(
entity_urn=test_entity_urn,
aspect_type=aspect_type,
)
# Create test datasets and fields
source_dataset_urn = make_dataset_urn(
platform="postgres", name=f"source_table_{uuid.uuid4()}", env="PROD"
)
target_dataset_urn = make_dataset_urn(
platform="postgres", name=f"target_table_{uuid.uuid4()}", env="PROD"
)
source_field_urn = make_schema_field_urn(source_dataset_urn, "user_id")
target_field_urn = make_schema_field_urn(target_dataset_urn, "user_id")
# Create fine-grained lineage
fine_grained_lineage = FineGrainedLineageClass(
upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
upstreams=[source_field_urn],
downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
downstreams=[target_field_urn],
transformOperation="OPERATION",
confidenceScore=1.0,
)
# Apply the fine-grained lineage patch using set method
patch_builder = patch_builder_class(test_entity_urn)
patch_builder.set_fine_grained_lineages([fine_grained_lineage])
for patch_mcp in patch_builder.build():
graph_client.emit_mcp(patch_mcp)
# Verify the lineage was applied
lineage_aspect = get_lineage_aspect()
assert lineage_aspect is not None
assert lineage_aspect.fineGrainedLineages is not None
assert len(lineage_aspect.fineGrainedLineages) == 1
stored_lineage = lineage_aspect.fineGrainedLineages[0]
assert stored_lineage.transformOperation == "OPERATION"
assert stored_lineage.confidenceScore == 1.0
assert stored_lineage.upstreams == [source_field_urn]
assert stored_lineage.downstreams == [target_field_urn]
# Cleanup: Clear all lineages
patch_builder.set_fine_grained_lineages([])
for patch_mcp in patch_builder.build():
graph_client.emit_mcp(patch_mcp)
# Verify cleanup worked
lineage_aspect = get_lineage_aspect()
assert lineage_aspect is not None
assert (
lineage_aspect.fineGrainedLineages is None
or len(lineage_aspect.fineGrainedLineages) == 0
)