2025-10-23 14:01:45 -07:00
|
|
|
from typing import List, Optional, Union
|
|
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
from datahub.sdk import DataHubClient, DatasetUrn, GlossaryTermUrn
|
2022-02-18 09:45:45 -08:00
|
|
|
|
|
|
|
|
|
2025-10-23 14:01:45 -07:00
|
|
|
def add_terms_to_dataset(
|
|
|
|
|
client: DataHubClient,
|
|
|
|
|
dataset_urn: DatasetUrn,
|
|
|
|
|
term_urns: List[Union[GlossaryTermUrn, str]],
|
|
|
|
|
) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Add glossary terms to a dataset.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
client: DataHub client to use
|
|
|
|
|
dataset_urn: URN of the dataset to update
|
|
|
|
|
term_urns: List of term URNs or term names to add
|
|
|
|
|
"""
|
|
|
|
|
dataset = client.entities.get(dataset_urn)
|
|
|
|
|
|
|
|
|
|
for term in term_urns:
|
|
|
|
|
if isinstance(term, str):
|
|
|
|
|
resolved_term_urn = client.resolve.term(name=term)
|
|
|
|
|
dataset.add_term(resolved_term_urn)
|
|
|
|
|
else:
|
|
|
|
|
dataset.add_term(term)
|
|
|
|
|
|
|
|
|
|
client.entities.update(dataset)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(client: Optional[DataHubClient] = None) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Main function to add terms to dataset example.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
client: Optional DataHub client (for testing). If not provided, creates one from env.
|
|
|
|
|
"""
|
|
|
|
|
client = client or DataHubClient.from_env()
|
|
|
|
|
|
|
|
|
|
dataset_urn = DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD")
|
|
|
|
|
|
|
|
|
|
# Add terms using both URN and name resolution
|
|
|
|
|
add_terms_to_dataset(
|
|
|
|
|
client=client,
|
|
|
|
|
dataset_urn=dataset_urn,
|
|
|
|
|
term_urns=[
|
|
|
|
|
GlossaryTermUrn("Classification.HighlyConfidential"),
|
|
|
|
|
"PII", # Will be resolved by name
|
|
|
|
|
],
|
|
|
|
|
)
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-08-21 10:22:08 -07:00
|
|
|
|
2025-10-23 14:01:45 -07:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|