mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-24 23:48:23 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			70 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Imports for urn construction utility methods
 | |
| from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn
 | |
| from datahub.emitter.mcp import MetadataChangeProposalWrapper
 | |
| from datahub.emitter.rest_emitter import DatahubRestEmitter
 | |
| 
 | |
| # Imports for metadata model classes
 | |
| from datahub.metadata.schema_classes import (
 | |
|     AuditStampClass,
 | |
|     ChangeTypeClass,
 | |
|     DateTypeClass,
 | |
|     OtherSchemaClass,
 | |
|     SchemaFieldClass,
 | |
|     SchemaFieldDataTypeClass,
 | |
|     SchemaMetadataClass,
 | |
|     StringTypeClass,
 | |
| )
 | |
| 
 | |
| event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
 | |
|     entityType="dataset",
 | |
|     changeType=ChangeTypeClass.UPSERT,
 | |
|     entityUrn=make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD"),
 | |
|     aspectName="schemaMetadata",
 | |
|     aspect=SchemaMetadataClass(
 | |
|         schemaName="customer",  # not used
 | |
|         platform=make_data_platform_urn("hive"),  # important <- platform must be an urn
 | |
|         version=0,  # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0
 | |
|         hash="",  # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string
 | |
|         platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"),
 | |
|         lastModified=AuditStampClass(
 | |
|             time=1640692800000, actor="urn:li:corpuser:ingestion"
 | |
|         ),
 | |
|         fields=[
 | |
|             SchemaFieldClass(
 | |
|                 fieldPath="address.zipcode",
 | |
|                 type=SchemaFieldDataTypeClass(type=StringTypeClass()),
 | |
|                 nativeDataType="VARCHAR(50)",  # use this to provide the type of the field in the source system's vernacular
 | |
|                 description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",
 | |
|                 lastModified=AuditStampClass(
 | |
|                     time=1640692800000, actor="urn:li:corpuser:ingestion"
 | |
|                 ),
 | |
|             ),
 | |
|             SchemaFieldClass(
 | |
|                 fieldPath="address.street",
 | |
|                 type=SchemaFieldDataTypeClass(type=StringTypeClass()),
 | |
|                 nativeDataType="VARCHAR(100)",
 | |
|                 description="Street corresponding to the address",
 | |
|                 lastModified=AuditStampClass(
 | |
|                     time=1640692800000, actor="urn:li:corpuser:ingestion"
 | |
|                 ),
 | |
|             ),
 | |
|             SchemaFieldClass(
 | |
|                 fieldPath="last_sold_date",
 | |
|                 type=SchemaFieldDataTypeClass(type=DateTypeClass()),
 | |
|                 nativeDataType="Date",
 | |
|                 description="Date of the last sale date for this property",
 | |
|                 created=AuditStampClass(
 | |
|                     time=1640692800000, actor="urn:li:corpuser:ingestion"
 | |
|                 ),
 | |
|                 lastModified=AuditStampClass(
 | |
|                     time=1640692800000, actor="urn:li:corpuser:ingestion"
 | |
|                 ),
 | |
|             ),
 | |
|         ],
 | |
|     ),
 | |
| )
 | |
| 
 | |
| # Create rest emitter
 | |
| rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
 | |
| rest_emitter.emit(event)
 | 
