55 lines
1.4 KiB
Python

from typing import List, Optional, Union
from datahub.sdk import DataHubClient, DatasetUrn, GlossaryTermUrn
def add_terms_to_dataset(
client: DataHubClient,
dataset_urn: DatasetUrn,
term_urns: List[Union[GlossaryTermUrn, str]],
) -> None:
"""
Add glossary terms to a dataset.
Args:
client: DataHub client to use
dataset_urn: URN of the dataset to update
term_urns: List of term URNs or term names to add
"""
dataset = client.entities.get(dataset_urn)
for term in term_urns:
if isinstance(term, str):
resolved_term_urn = client.resolve.term(name=term)
dataset.add_term(resolved_term_urn)
else:
dataset.add_term(term)
client.entities.update(dataset)
def main(client: Optional[DataHubClient] = None) -> None:
"""
Main function to add terms to dataset example.
Args:
client: Optional DataHub client (for testing). If not provided, creates one from env.
"""
client = client or DataHubClient.from_env()
dataset_urn = DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD")
# Add terms using both URN and name resolution
add_terms_to_dataset(
client=client,
dataset_urn=dataset_urn,
term_urns=[
GlossaryTermUrn("Classification.HighlyConfidential"),
"PII", # Will be resolved by name
],
)
if __name__ == "__main__":
main()