mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 18:59:23 +00:00 
			
		
		
		
	 0d5873db2a
			
		
	
	
		0d5873db2a
		
			
		
	
	
	
	
		
			
			Co-authored-by: Shirshanka Das <shirshanka@apache.org> Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com> Co-authored-by: David Leifker <david.leifker@acryl.io>
		
			
				
	
	
		
			51 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			51 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import logging
 | |
| from typing import Union
 | |
| 
 | |
| from datahub.configuration.kafka import KafkaProducerConnectionConfig
 | |
| from datahub.emitter.kafka_emitter import DatahubKafkaEmitter, KafkaEmitterConfig
 | |
| from datahub.emitter.mce_builder import make_dataset_urn
 | |
| from datahub.emitter.rest_emitter import DataHubRestEmitter
 | |
| from datahub.specific.dataset import DatasetPatchBuilder
 | |
| 
 | |
| log = logging.getLogger(__name__)
 | |
| logging.basicConfig(level=logging.INFO)
 | |
| 
 | |
| 
 | |
| # Get an emitter, either REST or Kafka, this example shows you both
 | |
| def get_emitter() -> Union[DataHubRestEmitter, DatahubKafkaEmitter]:
 | |
|     USE_REST_EMITTER = True
 | |
|     if USE_REST_EMITTER:
 | |
|         gms_endpoint = "http://localhost:8080"
 | |
|         return DataHubRestEmitter(gms_server=gms_endpoint)
 | |
|     else:
 | |
|         kafka_server = "localhost:9092"
 | |
|         schema_registry_url = "http://localhost:8081"
 | |
|         return DatahubKafkaEmitter(
 | |
|             config=KafkaEmitterConfig(
 | |
|                 connection=KafkaProducerConnectionConfig(
 | |
|                     bootstrap=kafka_server, schema_registry_url=schema_registry_url
 | |
|                 )
 | |
|             )
 | |
|         )
 | |
| 
 | |
| 
 | |
| dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
 | |
| 
 | |
| property_map_to_set = {
 | |
|     "cluster_name": "datahubproject.acryl.io",
 | |
|     "retention_time": "2 years",
 | |
| }
 | |
| 
 | |
| with get_emitter() as emitter:
 | |
|     for patch_mcp in (
 | |
|         DatasetPatchBuilder(dataset_urn)
 | |
|         .set_custom_properties(property_map_to_set)
 | |
|         .build()
 | |
|     ):
 | |
|         emitter.emit(patch_mcp)
 | |
| 
 | |
| 
 | |
| log.info(
 | |
|     f"Replaced custom properties on dataset {dataset_urn} as {property_map_to_set}"
 | |
| )
 |