fix: ignore non-serializable params when hashing pipeline objects (#3842)

* ignore non-serializable params when hashing pipeline objects

* make tests more clear
This commit is contained in:
Massimiliano Pippi 2023-01-11 17:11:41 +01:00 committed by GitHub
parent ccda51fb43
commit fa4404baa0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 11 deletions

View File

@ -2288,7 +2288,8 @@ class Pipeline:
return datetime.datetime.now(datetime.timezone.utc) - self.init_time
def send_pipeline_event(self, is_indexing: bool = False):
fingerprint = sha1(json.dumps(self.get_config(), sort_keys=True).encode()).hexdigest()
json_repr = json.dumps(self.get_config(), sort_keys=True, default=lambda o: "<not serializable>")
fingerprint = sha1(json_repr.encode()).hexdigest()
send_custom_event(
"pipeline",
payload={

View File

@ -1,19 +1,17 @@
from copy import deepcopy
from pathlib import Path
import os
import ssl
import json
import platform
import sys
import datetime
from typing import Tuple
from copy import deepcopy
from unittest import mock
import pytest
from requests import PreparedRequest
import responses
import logging
from transformers import pipeline
import yaml
import pandas as pd
from haystack import __version__
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
@ -36,16 +34,13 @@ from haystack.pipelines import (
DocumentSearchPipeline,
QuestionGenerationPipeline,
MostSimilarDocumentsPipeline,
BaseStandardPipeline,
)
from haystack.pipelines.config import validate_config_strings, get_component_definitions
from haystack.pipelines.utils import generate_code
from haystack.errors import PipelineConfigError
from haystack.nodes import PreProcessor, TextConverter, QuestionGenerator
from haystack.nodes import PreProcessor, TextConverter
from haystack.utils.deepsetcloud import DeepsetCloudError
from haystack import Document, Answer
from haystack.nodes.other.route_documents import RouteDocuments
from haystack.nodes.other.join_answers import JoinAnswers
from haystack import Answer
from ..conftest import (
MOCK_DC,
@ -2158,3 +2153,44 @@ def test_fix_to_pipeline_execution_when_join_follows_join():
res = pipeline.run(query="Alpha Beta Gamma Delta")
documents = res["documents"]
assert len(documents) == 4 # all four documents should be found
def test_send_pipeline_event():
"""
Test the event can be sent and the internal fields are correctly set
"""
pipeline = Pipeline()
pipeline.add_node(MockNode(), name="mock_node", inputs=["Query"])
with mock.patch("haystack.pipelines.base.send_custom_event") as mocked_send:
today_at_midnight = datetime.datetime.combine(datetime.datetime.now(), datetime.time.min, datetime.timezone.utc)
pipeline.send_pipeline_event()
mocked_send.assert_called_once()
assert pipeline.time_of_last_sent_event == today_at_midnight
assert pipeline.last_window_run_total == 0
def test_send_pipeline_event_unserializable_param():
"""
Test the event can be sent even when a certain component was initialized with a
non-serializable parameter, see https://github.com/deepset-ai/haystack/issues/3833
"""
class CustomNode(MockNode):
"""A mock node that can be inited passing a param"""
def __init__(self, param):
self.param = param
# create a custom node passing a parameter that can't be serialized (an empty set)
custom_node = CustomNode(param=set())
pipeline = Pipeline()
pipeline.add_node(custom_node, name="custom_node", inputs=["Query"])
with mock.patch("haystack.pipelines.base.send_custom_event") as mocked_send:
today_at_midnight = datetime.datetime.combine(datetime.datetime.now(), datetime.time.min, datetime.timezone.utc)
pipeline.send_pipeline_event()
mocked_send.assert_called_once()
assert pipeline.time_of_last_sent_event == today_at_midnight
assert pipeline.last_window_run_total == 0