fix: Update telemetry to not serialize Pipeline if disabled. (#4000)

* Update telemetry to not serialize Pipeline if disabled.

* Also disabled telemetry sending event in run_async in the RayPipeline since RayPipeline cannot be serialized currently.
This commit is contained in:
Sebastian 2023-01-30 16:58:43 +01:00 committed by GitHub
parent 1a8fe0031d
commit 249398d806
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 10 deletions

View File

@ -51,7 +51,7 @@ from haystack.nodes import BaseGenerator, Docs2Answers, BaseReader, BaseSummariz
from haystack.nodes.base import BaseComponent, RootNode
from haystack.nodes.retriever.base import BaseRetriever
from haystack.document_stores.base import BaseDocumentStore
from haystack.telemetry import send_event, send_custom_event
from haystack.telemetry import send_event, send_custom_event, is_telemetry_enabled
from haystack.utils.experiment_tracking import MLflowTrackingHead, Tracker as tracker
@ -2309,18 +2309,21 @@ class Pipeline:
self.last_window_run_total = self.run_total
def send_pipeline_event_if_needed(self, is_indexing: bool = False):
should_send_event = self.has_event_time_interval_exceeded() or self.has_event_run_total_threshold_exceeded()
if should_send_event and not self.sent_event_in_window:
self.send_pipeline_event(is_indexing)
self.sent_event_in_window = True
elif self.has_event_time_interval_exceeded():
self.sent_event_in_window = False
if is_telemetry_enabled():
should_send_event = (
self._has_event_time_interval_exceeded() or self._has_event_run_total_threshold_exceeded()
)
if should_send_event and not self.sent_event_in_window:
self.send_pipeline_event(is_indexing)
self.sent_event_in_window = True
elif self._has_event_time_interval_exceeded():
self.sent_event_in_window = False
def has_event_time_interval_exceeded(self):
def _has_event_time_interval_exceeded(self):
now = datetime.datetime.now(datetime.timezone.utc)
return now - self.time_of_last_sent_event > self.event_time_interval
def has_event_run_total_threshold_exceeded(self):
def _has_event_run_total_threshold_exceeded(self):
return self.run_total - self.last_window_run_total > self.event_run_total_threshold

View File

@ -401,7 +401,8 @@ class RayPipeline(Pipeline):
i += 1 # attempt executing next node in the queue as current `node_id` has unprocessed predecessors
self.run_total += 1
self.send_pipeline_event_if_needed(is_indexing=file_paths is not None)
# Disabled due to issue https://github.com/deepset-ai/haystack/issues/3970
# self.send_pipeline_event_if_needed(is_indexing=file_paths is not None)
return node_output