mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 18:59:23 +00:00 
			
		
		
		
	
		
			
	
	
		
			35 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			35 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | import datahub.emitter.mce_builder as builder | ||
|  | import datahub.metadata.schema_classes as models | ||
|  | from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||
|  | from datahub.emitter.rest_emitter import DatahubRestEmitter | ||
|  | 
 | ||
|  | # Create an emitter to DataHub over REST | ||
|  | emitter = DatahubRestEmitter(gms_server="http://localhost:8080", extra_headers={}) | ||
|  | 
 | ||
|  | dataset_urn = builder.make_dataset_urn( | ||
|  |     name="fct_users_created", platform="hive", env="PROD" | ||
|  | ) | ||
|  | primary_key_urn = builder.make_ml_primary_key_urn( | ||
|  |     feature_table_name="users_feature_table", | ||
|  |     primary_key_name="user_id", | ||
|  | ) | ||
|  | 
 | ||
|  | #  Create feature | ||
|  | metadata_change_proposal = MetadataChangeProposalWrapper( | ||
|  |     entityType="mlPrimaryKey", | ||
|  |     changeType=models.ChangeTypeClass.UPSERT, | ||
|  |     entityUrn=primary_key_urn, | ||
|  |     aspectName="mlPrimaryKeyProperties", | ||
|  |     aspect=models.MLPrimaryKeyPropertiesClass( | ||
|  |         description="Represents the id of the user the other features relate to.", | ||
|  |         # attaching a source to a ml primary key creates lineage between the feature | ||
|  |         # and the upstream dataset. This is how lineage between your data warehouse | ||
|  |         # and machine learning ecosystem is established. | ||
|  |         sources=[dataset_urn], | ||
|  |         dataType="TEXT", | ||
|  |     ), | ||
|  | ) | ||
|  | 
 | ||
|  | # Emit metadata! | ||
|  | emitter.emit(metadata_change_proposal) |