| 
									
										
										
										
											2025-06-12 14:00:26 +09:00
										 |  |  | from datahub.metadata.urns import DatasetUrn, TagUrn | 
					
						
							| 
									
										
										
										
											2025-05-29 22:53:56 +09:00
										 |  |  | from datahub.sdk import DataFlow, DataHubClient, DataJob | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | client = DataHubClient.from_env() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # datajob will inherit the platform and platform instance from the flow | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | dataflow = DataFlow( | 
					
						
							|  |  |  |     platform="airflow", | 
					
						
							|  |  |  |     name="example_dag", | 
					
						
							|  |  |  |     platform_instance="PROD", | 
					
						
							|  |  |  |     description="example dataflow", | 
					
						
							|  |  |  |     tags=[TagUrn(name="tag1"), TagUrn(name="tag2")], | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | datajob = DataJob( | 
					
						
							|  |  |  |     name="example_datajob", | 
					
						
							|  |  |  |     flow=dataflow, | 
					
						
							| 
									
										
										
										
											2025-06-12 14:00:26 +09:00
										 |  |  |     inlets=[ | 
					
						
							|  |  |  |         DatasetUrn(platform="hdfs", name="dataset1", env="PROD"), | 
					
						
							|  |  |  |     ], | 
					
						
							|  |  |  |     outlets=[ | 
					
						
							|  |  |  |         DatasetUrn(platform="hdfs", name="dataset2", env="PROD"), | 
					
						
							|  |  |  |     ], | 
					
						
							| 
									
										
										
										
											2025-05-29 22:53:56 +09:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | client.entities.upsert(dataflow) | 
					
						
							|  |  |  | client.entities.upsert(datajob) |