mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-12 17:34:18 +00:00
88 lines
3.0 KiB
Python
88 lines
3.0 KiB
Python
import os
|
|
|
|
from datahub.emitter.mce_builder import make_term_urn
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
from datahub.metadata.schema_classes import (
|
|
AuditStampClass,
|
|
GlossaryTermInfoClass,
|
|
InstitutionalMemoryClass,
|
|
InstitutionalMemoryMetadataClass,
|
|
OwnerClass,
|
|
OwnershipClass,
|
|
OwnershipSourceClass,
|
|
OwnershipSourceTypeClass,
|
|
OwnershipTypeClass,
|
|
)
|
|
from datahub.metadata.urns import GlossaryNodeUrn
|
|
|
|
# Create the term URN
|
|
term_urn = make_term_urn("Classification.PII")
|
|
|
|
# Create GlossaryTermInfo with full metadata
|
|
term_info = GlossaryTermInfoClass(
|
|
name="Personally Identifiable Information",
|
|
definition="Information that can be used to identify, contact, or locate a single person, or to identify an individual in context. Examples include name, email address, phone number, and social security number.",
|
|
termSource="INTERNAL",
|
|
# Link to a parent term group (glossary node)
|
|
parentNode=str(GlossaryNodeUrn("Classification")),
|
|
# Custom properties for additional metadata
|
|
customProperties={
|
|
"sensitivity_level": "HIGH",
|
|
"data_retention_period": "7_years",
|
|
"regulatory_framework": "GDPR,CCPA",
|
|
},
|
|
)
|
|
|
|
# Add ownership information
|
|
ownership = OwnershipClass(
|
|
owners=[
|
|
OwnerClass(
|
|
owner="urn:li:corpuser:datahub",
|
|
type=OwnershipTypeClass.DATAOWNER,
|
|
source=OwnershipSourceClass(type=OwnershipSourceTypeClass.MANUAL),
|
|
),
|
|
OwnerClass(
|
|
owner="urn:li:corpGroup:privacy-team",
|
|
type=OwnershipTypeClass.DATAOWNER,
|
|
source=OwnershipSourceClass(type=OwnershipSourceTypeClass.MANUAL),
|
|
),
|
|
]
|
|
)
|
|
|
|
# Add links to related documentation
|
|
institutional_memory = InstitutionalMemoryClass(
|
|
elements=[
|
|
InstitutionalMemoryMetadataClass(
|
|
url="https://wiki.company.com/privacy/pii-guidelines",
|
|
description="Internal PII Handling Guidelines",
|
|
createStamp=AuditStampClass(time=0, actor="urn:li:corpuser:datahub"),
|
|
),
|
|
InstitutionalMemoryMetadataClass(
|
|
url="https://gdpr.eu/",
|
|
description="GDPR Official Documentation",
|
|
createStamp=AuditStampClass(time=0, actor="urn:li:corpuser:datahub"),
|
|
),
|
|
]
|
|
)
|
|
|
|
# Emit all aspects for the glossary term
|
|
# Get DataHub connection details from environment
|
|
gms_server = os.getenv("DATAHUB_GMS_URL", "http://localhost:8080")
|
|
token = os.getenv("DATAHUB_GMS_TOKEN")
|
|
|
|
rest_emitter = DatahubRestEmitter(gms_server=gms_server, token=token)
|
|
|
|
# Emit term info
|
|
rest_emitter.emit(MetadataChangeProposalWrapper(entityUrn=term_urn, aspect=term_info))
|
|
|
|
# Emit ownership
|
|
rest_emitter.emit(MetadataChangeProposalWrapper(entityUrn=term_urn, aspect=ownership))
|
|
|
|
# Emit institutional memory (documentation links)
|
|
rest_emitter.emit(
|
|
MetadataChangeProposalWrapper(entityUrn=term_urn, aspect=institutional_memory)
|
|
)
|
|
|
|
print(f"Created glossary term with full metadata: {term_urn}")
|