2025-03-21 16:00:05 -07:00
|
|
|
from datahub.sdk import DataHubClient, DatasetUrn
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
client = DataHubClient.from_env()
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
dataset = client.entities.get(DatasetUrn(platform="hive", name="realestate_db.sales"))
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
# Add dataset documentation
|
|
|
|
documentation = """## The Real Estate Sales Dataset
|
|
|
|
This is a really important Dataset that contains all the relevant information about sales that have happened organized by address.
|
|
|
|
"""
|
|
|
|
dataset.set_description(documentation)
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
# Add link to institutional memory
|
|
|
|
dataset.add_link(
|
|
|
|
(
|
|
|
|
"https://wikipedia.com/real_estate",
|
|
|
|
"This is the definition of what real estate means", # link description
|
2022-02-18 09:45:45 -08:00
|
|
|
)
|
|
|
|
)
|
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
client.entities.update(dataset)
|