2025-03-21 16:00:05 -07:00
|
|
|
from datahub.sdk import DataHubClient, Dataset
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
client = DataHubClient.from_env()
|
2022-02-18 09:45:45 -08:00
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
dataset = Dataset(
|
|
|
|
platform="hive",
|
|
|
|
name="realestate_db.sales",
|
|
|
|
schema=[
|
|
|
|
# tuples of (field name / field path, data type, description)
|
|
|
|
(
|
|
|
|
"address.zipcode",
|
|
|
|
"varchar(50)",
|
|
|
|
"This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",
|
2022-05-25 11:21:26 +05:30
|
|
|
),
|
2025-03-21 16:00:05 -07:00
|
|
|
("address.street", "varchar(100)", "Street corresponding to the address"),
|
|
|
|
("last_sold_date", "date", "Date of the last sale date for this property"),
|
|
|
|
],
|
2022-02-18 09:45:45 -08:00
|
|
|
)
|
|
|
|
|
2025-03-21 16:00:05 -07:00
|
|
|
client.entities.upsert(dataset)
|