mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-08 09:41:19 +00:00
70 lines
3.0 KiB
Python
70 lines
3.0 KiB
Python
# Imports for urn construction utility methods
|
|
from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn
|
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
|
|
# Imports for metadata model classes
|
|
from datahub.metadata.schema_classes import (
|
|
AuditStampClass,
|
|
ChangeTypeClass,
|
|
DateTypeClass,
|
|
OtherSchemaClass,
|
|
SchemaFieldClass,
|
|
SchemaFieldDataTypeClass,
|
|
SchemaMetadataClass,
|
|
StringTypeClass,
|
|
)
|
|
|
|
event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
|
|
entityType="dataset",
|
|
changeType=ChangeTypeClass.UPSERT,
|
|
entityUrn=make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD"),
|
|
aspectName="schemaMetadata",
|
|
aspect=SchemaMetadataClass(
|
|
schemaName="customer", # not used
|
|
platform=make_data_platform_urn("hive"), # important <- platform must be an urn
|
|
version=0, # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0
|
|
hash="", # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string
|
|
platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"),
|
|
lastModified=AuditStampClass(
|
|
time=1640692800000, actor="urn:li:corpuser:ingestion"
|
|
),
|
|
fields=[
|
|
SchemaFieldClass(
|
|
fieldPath="address.zipcode",
|
|
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
|
|
nativeDataType="VARCHAR(50)", # use this to provide the type of the field in the source system's vernacular
|
|
description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",
|
|
lastModified=AuditStampClass(
|
|
time=1640692800000, actor="urn:li:corpuser:ingestion"
|
|
),
|
|
),
|
|
SchemaFieldClass(
|
|
fieldPath="address.street",
|
|
type=SchemaFieldDataTypeClass(type=StringTypeClass()),
|
|
nativeDataType="VARCHAR(100)",
|
|
description="Street corresponding to the address",
|
|
lastModified=AuditStampClass(
|
|
time=1640692800000, actor="urn:li:corpuser:ingestion"
|
|
),
|
|
),
|
|
SchemaFieldClass(
|
|
fieldPath="last_sold_date",
|
|
type=SchemaFieldDataTypeClass(type=DateTypeClass()),
|
|
nativeDataType="Date",
|
|
description="Date of the last sale date for this property",
|
|
created=AuditStampClass(
|
|
time=1640692800000, actor="urn:li:corpuser:ingestion"
|
|
),
|
|
lastModified=AuditStampClass(
|
|
time=1640692800000, actor="urn:li:corpuser:ingestion"
|
|
),
|
|
),
|
|
],
|
|
),
|
|
)
|
|
|
|
# Create rest emitter
|
|
rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
|
|
rest_emitter.emit(event)
|