mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-12 17:34:18 +00:00
55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
from typing import List, Optional, Union
|
|
|
|
from datahub.sdk import DataHubClient, DatasetUrn, GlossaryTermUrn
|
|
|
|
|
|
def add_terms_to_dataset(
|
|
client: DataHubClient,
|
|
dataset_urn: DatasetUrn,
|
|
term_urns: List[Union[GlossaryTermUrn, str]],
|
|
) -> None:
|
|
"""
|
|
Add glossary terms to a dataset.
|
|
|
|
Args:
|
|
client: DataHub client to use
|
|
dataset_urn: URN of the dataset to update
|
|
term_urns: List of term URNs or term names to add
|
|
"""
|
|
dataset = client.entities.get(dataset_urn)
|
|
|
|
for term in term_urns:
|
|
if isinstance(term, str):
|
|
resolved_term_urn = client.resolve.term(name=term)
|
|
dataset.add_term(resolved_term_urn)
|
|
else:
|
|
dataset.add_term(term)
|
|
|
|
client.entities.update(dataset)
|
|
|
|
|
|
def main(client: Optional[DataHubClient] = None) -> None:
|
|
"""
|
|
Main function to add terms to dataset example.
|
|
|
|
Args:
|
|
client: Optional DataHub client (for testing). If not provided, creates one from env.
|
|
"""
|
|
client = client or DataHubClient.from_env()
|
|
|
|
dataset_urn = DatasetUrn(platform="hive", name="realestate_db.sales", env="PROD")
|
|
|
|
# Add terms using both URN and name resolution
|
|
add_terms_to_dataset(
|
|
client=client,
|
|
dataset_urn=dataset_urn,
|
|
term_urns=[
|
|
GlossaryTermUrn("Classification.HighlyConfidential"),
|
|
"PII", # Will be resolved by name
|
|
],
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|