From 17868cb06991dfded163ca5df49f23f7341ae690 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 9 Aug 2024 14:48:53 -0700 Subject: [PATCH] feat(ingest): various logging improvements (#11126) --- .../src/datahub/ingestion/sink/datahub_rest.py | 2 ++ .../src/datahub/telemetry/telemetry.py | 14 +++++++++++--- .../src/datahub/utilities/logging_manager.py | 5 +++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index a9f788acf6..6d7105bd26 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -202,6 +202,8 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]): record_urn = _get_urn(record_envelope) if record_urn: e.info["urn"] = record_urn + if workunit_id := record_envelope.metadata.get("workunit_id"): + e.info["workunit_id"] = workunit_id if not self.treat_errors_as_warnings: self.report.report_failure({"error": e.message, "info": e.info}) diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index 69a790b3d9..4faf04ee2d 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -283,7 +283,7 @@ class Telemetry: if not self.enabled or self.mp is None or self.tracking_init is True: return - logger.debug("Sending init Telemetry") + logger.debug("Sending init telemetry") try: self.mp.people_set( self.client_id, @@ -310,13 +310,21 @@ class Telemetry: if not self.enabled or self.mp is None: return + properties = properties or {} + # send event try: - logger.debug(f"Sending telemetry for {event_name}") + if event_name == "function-call": + logger.debug( + f"Sending telemetry for {event_name} {properties.get('function')}, status {properties.get('status')}" + ) + else: + logger.debug(f"Sending telemetry for {event_name}") + properties = { **_default_telemetry_properties(), **self._server_props(server), - **(properties or {}), + **properties, } self.mp.track(self.client_id, event_name, properties) except Exception as e: diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py index 64383745eb..1eb7633940 100644 --- a/metadata-ingestion/src/datahub/utilities/logging_manager.py +++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py @@ -35,6 +35,8 @@ DATAHUB_PACKAGES = [ "acryl_datahub_cloud", ] IN_MEMORY_LOG_BUFFER_SIZE = 2000 # lines +IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH = 2000 # characters + NO_COLOR = os.environ.get("NO_COLOR", False) @@ -159,6 +161,9 @@ class _LogBuffer: self._buffer: Deque[str] = collections.deque(maxlen=maxlen) def write(self, line: str) -> None: + if len(line) > IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH: + line = line[:IN_MEMORY_LOG_BUFFER_MAX_LINE_LENGTH] + "[truncated]" + self._buffer.append(line) def clear(self) -> None: