From 179e9cea08f00f25c785e371c4ca1b092837bb4c Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Fri, 12 May 2023 18:51:26 +0200 Subject: [PATCH] feat: Send pipeline config hash every 100 runs (#4884) * Add since_last_run property * Revert "Add since_last_run property" This reverts commit c1c907ef58a696a97d964fb9c45fbee0c80365aa. * Send pipeline config hash for each run * Send event every 100 runs * Merge branch 'main' into telemetry_since_last_run * PR review * Move constant --- haystack/pipelines/base.py | 5 +++++ haystack/telemetry.py | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index e3016da45..dca9a55d5 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -73,6 +73,7 @@ class Pipeline: self.graph = DiGraph() self.config_hash = None self.last_config_hash = None + self.runs = 0 @property def root_node(self) -> Optional[str]: @@ -492,6 +493,8 @@ class Pipeline: about their execution. By default, this information includes the input parameters the Nodes received and the output they generated. You can then find all debug information in the dictionary returned by this method under the key `_debug`. """ + self.runs += 1 + send_pipeline_event( pipeline=self, query=query, @@ -640,6 +643,8 @@ class Pipeline: about their execution. By default, this information includes the input parameters the Nodes received and the output they generated. You can then find all debug information in the dictionary returned by this method under the key `_debug`. """ + self.runs += 1 + send_pipeline_event( pipeline=self, queries=queries, diff --git a/haystack/telemetry.py b/haystack/telemetry.py index e6e24f9e1..db4c0f716 100644 --- a/haystack/telemetry.py +++ b/haystack/telemetry.py @@ -14,6 +14,7 @@ HAYSTACK_TELEMETRY_ENABLED = "HAYSTACK_TELEMETRY_ENABLED" HAYSTACK_EXECUTION_CONTEXT = "HAYSTACK_EXECUTION_CONTEXT" HAYSTACK_DOCKER_CONTAINER = "HAYSTACK_DOCKER_CONTAINER" CONFIG_PATH = Path("~/.haystack/config.yaml").expanduser() +SEND_EVENT_EVERY_N_RUNS = 100 logger = logging.getLogger(__name__) @@ -146,14 +147,18 @@ def send_pipeline_event( # type: ignore telemetry.send_event(event_name="Public Demo", event_properties=event_properties) return - # Send this event only if the pipeline config has changed - if pipeline.last_config_hash == pipeline.config_hash: + # If pipeline config has not changed, send an event every SEND_EVENT_EVERY_N_RUNS runs + if pipeline.last_config_hash == pipeline.config_hash and pipeline.runs % SEND_EVENT_EVERY_N_RUNS == 0: + event_properties = {"pipeline.config_hash": pipeline.config_hash, "pipeline.runs": pipeline.runs} + telemetry.send_event(event_name="Pipeline", event_properties=event_properties) return pipeline.last_config_hash = pipeline.config_hash + pipeline.runs = 1 event_properties = { "pipeline.classname": pipeline.__class__.__name__, "pipeline.config_hash": pipeline.config_hash, + "pipeline.runs": pipeline.runs, } # Add document store