mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 10:49:00 +00:00 
			
		
		
		
	fix(ingest): unify emit interface (#6592)
This commit is contained in:
		
							parent
							
								
									6fe9ad4fbb
								
							
						
					
					
						commit
						d6dd8ccc51
					
				| @ -1,7 +1,7 @@ | ||||
| from __future__ import annotations | ||||
| 
 | ||||
| from dataclasses import dataclass, field | ||||
| from typing import Callable, Iterable, Optional, Union, cast | ||||
| from typing import Callable, Iterable, Optional, Union | ||||
| 
 | ||||
| import datahub.emitter.mce_builder as builder | ||||
| from datahub.emitter.kafka_emitter import DatahubKafkaEmitter | ||||
| @ -81,10 +81,4 @@ class CorpGroup: | ||||
|         :param callback: The callback method for KafkaEmitter if it is used | ||||
|         """ | ||||
|         for mcp in self.generate_mcp(): | ||||
|             if type(emitter).__name__ == "DatahubKafkaEmitter": | ||||
|                 assert callback is not None | ||||
|                 kafka_emitter = cast("DatahubKafkaEmitter", emitter) | ||||
|                 kafka_emitter.emit(mcp, callback) | ||||
|             else: | ||||
|                 rest_emitter = cast("DatahubRestEmitter", emitter) | ||||
|                 rest_emitter.emit(mcp) | ||||
|             emitter.emit(mcp, callback) | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| from __future__ import annotations | ||||
| 
 | ||||
| from dataclasses import dataclass, field | ||||
| from typing import Callable, Iterable, List, Optional, Union, cast | ||||
| from typing import Callable, Iterable, List, Optional, Union | ||||
| 
 | ||||
| import datahub.emitter.mce_builder as builder | ||||
| from datahub.emitter.kafka_emitter import DatahubKafkaEmitter | ||||
| @ -100,10 +100,4 @@ class CorpUser: | ||||
|         :param callback: The callback method for KafkaEmitter if it is used | ||||
|         """ | ||||
|         for mcp in self.generate_mcp(): | ||||
|             if type(emitter).__name__ == "DatahubKafkaEmitter": | ||||
|                 assert callback is not None | ||||
|                 kafka_emitter = cast("DatahubKafkaEmitter", emitter) | ||||
|                 kafka_emitter.emit(mcp, callback) | ||||
|             else: | ||||
|                 rest_emitter = cast("DatahubRestEmitter", emitter) | ||||
|                 rest_emitter.emit(mcp) | ||||
|             emitter.emit(mcp, callback) | ||||
|  | ||||
| @ -135,10 +135,6 @@ class DataFlow: | ||||
|         :param emitter: Datahub Emitter to emit the process event | ||||
|         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used | ||||
|         """ | ||||
|         from datahub.emitter.kafka_emitter import DatahubKafkaEmitter | ||||
| 
 | ||||
|         for mcp in self.generate_mcp(): | ||||
|             if isinstance(emitter, DatahubKafkaEmitter): | ||||
|                 emitter.emit(mcp, callback) | ||||
|             else: | ||||
|                 emitter.emit(mcp) | ||||
|             emitter.emit(mcp, callback) | ||||
|  | ||||
| @ -178,13 +178,9 @@ class DataJob: | ||||
|         :param emitter: Datahub Emitter to emit the process event | ||||
|         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used | ||||
|         """ | ||||
|         from datahub.emitter.kafka_emitter import DatahubKafkaEmitter | ||||
| 
 | ||||
|         for mcp in self.generate_mcp(): | ||||
|             if isinstance(emitter, DatahubKafkaEmitter): | ||||
|                 emitter.emit(mcp, callback) | ||||
|             else: | ||||
|                 emitter.emit(mcp) | ||||
|             emitter.emit(mcp, callback) | ||||
| 
 | ||||
|     def generate_data_input_output_mcp(self) -> Iterable[MetadataChangeProposalWrapper]: | ||||
|         mcp = MetadataChangeProposalWrapper( | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| import time | ||||
| from dataclasses import dataclass, field | ||||
| from enum import Enum | ||||
| from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Union, cast | ||||
| from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Optional, Union | ||||
| 
 | ||||
| from datahub.api.entities.datajob import DataFlow, DataJob | ||||
| from datahub.emitter.mcp import MetadataChangeProposalWrapper | ||||
| @ -282,13 +282,7 @@ class DataProcessInstance: | ||||
|         :param emitter: (Union[DatahubRestEmitter, DatahubKafkaEmitter]) the datahub emitter to emit generated mcps | ||||
|         :param callback: (Optional[Callable[[Exception, str], None]]) the callback method for KafkaEmitter if it is used | ||||
|         """ | ||||
|         if type(emitter).__name__ == "DatahubKafkaEmitter": | ||||
|             assert callback is not None | ||||
|             kafka_emitter = cast("DatahubKafkaEmitter", emitter) | ||||
|             kafka_emitter.emit(mcp, callback) | ||||
|         else: | ||||
|             rest_emitter = cast("DatahubRestEmitter", emitter) | ||||
|             rest_emitter.emit(mcp) | ||||
|         emitter.emit(mcp, callback) | ||||
| 
 | ||||
|     def emit( | ||||
|         self, | ||||
|  | ||||
| @ -116,9 +116,9 @@ class DatahubKafkaEmitter: | ||||
|         callback: Optional[Callable[[Exception, str], None]] = None, | ||||
|     ) -> None: | ||||
|         if isinstance(item, (MetadataChangeProposal, MetadataChangeProposalWrapper)): | ||||
|             return self.emit_mcp_async(item, callback or _noop_callback) | ||||
|             return self.emit_mcp_async(item, callback or _error_reporting_callback) | ||||
|         else: | ||||
|             return self.emit_mce_async(item, callback or _noop_callback) | ||||
|             return self.emit_mce_async(item, callback or _error_reporting_callback) | ||||
| 
 | ||||
|     def emit_mce_async( | ||||
|         self, | ||||
| @ -155,5 +155,6 @@ class DatahubKafkaEmitter: | ||||
|             producer.flush() | ||||
| 
 | ||||
| 
 | ||||
| def _noop_callback(err: Exception, msg: str) -> None: | ||||
|     pass | ||||
| def _error_reporting_callback(err: Exception, msg: str) -> None: | ||||
|     if err: | ||||
|         logger.error(f"Failed to emit to kafka: {err} {msg}") | ||||
|  | ||||
| @ -4,7 +4,7 @@ import json | ||||
| import logging | ||||
| import os | ||||
| from json.decoder import JSONDecodeError | ||||
| from typing import Any, Dict, List, Optional, Tuple, Union | ||||
| from typing import Any, Callable, Dict, List, Optional, Tuple, Union | ||||
| 
 | ||||
| import requests | ||||
| from requests.adapters import HTTPAdapter, Retry | ||||
| @ -175,15 +175,29 @@ class DataHubRestEmitter(Closeable): | ||||
|             MetadataChangeProposalWrapper, | ||||
|             UsageAggregation, | ||||
|         ], | ||||
|         # NOTE: This signature should have the exception be optional rather than | ||||
|         #      required. However, this would be a breaking change that may need | ||||
|         #      more careful consideration. | ||||
|         callback: Optional[Callable[[Exception, str], None]] = None, | ||||
|     ) -> Tuple[datetime.datetime, datetime.datetime]: | ||||
|         start_time = datetime.datetime.now() | ||||
|         if isinstance(item, UsageAggregation): | ||||
|             self.emit_usage(item) | ||||
|         elif isinstance(item, (MetadataChangeProposal, MetadataChangeProposalWrapper)): | ||||
|             self.emit_mcp(item) | ||||
|         try: | ||||
|             if isinstance(item, UsageAggregation): | ||||
|                 self.emit_usage(item) | ||||
|             elif isinstance( | ||||
|                 item, (MetadataChangeProposal, MetadataChangeProposalWrapper) | ||||
|             ): | ||||
|                 self.emit_mcp(item) | ||||
|             else: | ||||
|                 self.emit_mce(item) | ||||
|         except Exception as e: | ||||
|             if callback: | ||||
|                 callback(e, str(e)) | ||||
|             raise | ||||
|         else: | ||||
|             self.emit_mce(item) | ||||
|         return start_time, datetime.datetime.now() | ||||
|             if callback: | ||||
|                 callback(None, "success")  # type: ignore | ||||
|             return start_time, datetime.datetime.now() | ||||
| 
 | ||||
|     def emit_mce(self, mce: MetadataChangeEvent) -> None: | ||||
|         url = f"{self._gms_server}/entities?action=ingest" | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Harshal Sheth
						Harshal Sheth