Fix pipeline config and agent tools hashing for telemetry (#4508)

This commit is contained in:
Silvano Cerza 2023-03-28 09:41:50 +02:00 committed by GitHub
parent c777302fb4
commit cfb8dfd470
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 4 deletions

View File

@ -191,7 +191,8 @@ class Agent:
See haystack/telemetry.py::send_event
"""
try:
self.hash = md5(" ".join([tool.pipeline_or_node.__class__.__name__ for tool in self.tools.values()]))
tool_names = " ".join([tool.pipeline_or_node.__class__.__name__ for tool in self.tools.values()])
self.hash = md5(tool_names.encode()).hexdigest()
except Exception as exc:
logger.debug("Telemetry exception: %s", str(exc))
self.hash = "[an exception occurred during hashing]"

View File

@ -438,7 +438,7 @@ class Pipeline:
for comp in config_to_hash["components"]:
del comp["name"]
config_hash = json.dumps(config_to_hash, default=str)
self.config_hash = md5(config_hash)
self.config_hash = md5(config_hash.encode()).hexdigest()
except Exception as exc:
logger.debug("Telemetry exception: %s", str(exc))
self.config_hash = "[an exception occurred during hashing]"

View File

@ -3,6 +3,8 @@ import os
import re
from typing import Tuple
from test.conftest import MockRetriever, MockPromptNode
from unittest import mock
import pytest
from haystack import BaseComponent, Answer
@ -11,7 +13,6 @@ from haystack.agents.base import Tool
from haystack.errors import AgentError
from haystack.nodes import PromptModel, PromptNode, PromptTemplate
from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline, BaseStandardPipeline
from test.conftest import MockRetriever, MockPromptNode
@pytest.mark.unit
@ -335,3 +336,30 @@ def test_agent_run_batch(reader, retriever_with_docs, document_store_with_docs):
# TODO Replace Count tool once more tools are implemented so that we do not need to account for off-by-one errors
assert any(digit in results["answers"][0][0].answer for digit in ["5", "6", "five", "six"])
assert any(digit in results["answers"][1][0].answer for digit in ["5", "6", "five", "six"])
@pytest.mark.unit
def test_update_hash():
agent = Agent(prompt_node=mock.Mock(), prompt_template=mock.Mock())
assert agent.hash == "d41d8cd98f00b204e9800998ecf8427e"
agent.add_tool(
Tool(
name="Search",
pipeline_or_node=mock.Mock(),
description="useful for when you need to answer "
"questions about where people live. You "
"should ask targeted questions",
output_variable="answers",
)
)
assert agent.hash == "d41d8cd98f00b204e9800998ecf8427e"
agent.add_tool(
Tool(
name="Count",
pipeline_or_node=mock.Mock(),
description="useful for when you need to count how many characters are in a word. Ask only with a single word.",
)
)
assert agent.hash == "d41d8cd98f00b204e9800998ecf8427e"
agent.update_hash()
assert agent.hash == "5ac8eca2f92c9545adcce3682b80d4c5"

View File

@ -2,7 +2,6 @@ import ssl
import json
import platform
import sys
import datetime
from typing import Tuple
from copy import deepcopy
from unittest import mock
@ -2048,3 +2047,31 @@ def test_fix_to_pipeline_execution_when_join_follows_join():
res = pipeline.run(query="Alpha Beta Gamma Delta")
documents = res["documents"]
assert len(documents) == 4 # all four documents should be found
@pytest.mark.unit
def test_update_config_hash():
fake_configs = {
"version": "ignore",
"components": [
{
"name": "MyReader",
"type": "FARMReader",
"params": {"no_ans_boost": -10, "model_name_or_path": "deepset/roberta-base-squad2"},
}
],
"pipelines": [
{
"name": "my_query_pipeline",
"nodes": [
{"name": "MyRetriever", "inputs": ["Query"]},
{"name": "MyReader", "inputs": ["MyRetriever"]},
],
}
],
}
with mock.patch("haystack.pipelines.base.Pipeline.get_config", return_value=fake_configs):
test_pipeline = Pipeline()
assert test_pipeline.config_hash == None
test_pipeline.update_config_hash()
assert test_pipeline.config_hash == "a30d3273de0d70e63e8cd91d915255b3"