from typing import List, Optional, Union from datahub.sdk import DataHubClient, DatasetUrn, GlossaryTermUrn def add_terms_to_dataset( client: DataHubClient, dataset_urn: DatasetUrn, term_urns: List[Union[GlossaryTermUrn, str]], ) -> None: """ Add glossary terms to a dataset. Args: client: DataHub client to use dataset_urn: URN of the dataset to update term_urns: List of term URNs or term names to add """ dataset = client.entities.get(dataset_urn) for term in term_urns: if isinstance(term, str): resolved_term_urn = client.resolve.term(name=term) dataset.add_term(resolved_term_urn) else: dataset.add_term(term) client.entities.update(dataset) def main(client: Optional[DataHubClient] = None) -> None: """ Main function to add terms to dataset example. Args: client: Optional DataHub client (for testing). If not provided, creates one from env. """ client = client or DataHubClient.from_env() dataset_urn = DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD") # Add terms using both URN and name resolution add_terms_to_dataset( client=client, dataset_urn=dataset_urn, term_urns=[ GlossaryTermUrn("Classification.HighlyConfidential"), "PII", # Will be resolved by name ], ) if __name__ == "__main__": main()