mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-26 06:28:33 +00:00
fix: ignore non-serializable params when hashing pipeline objects (#3842)
* ignore non-serializable params when hashing pipeline objects * make tests more clear
This commit is contained in:
parent
ccda51fb43
commit
fa4404baa0
@ -2288,7 +2288,8 @@ class Pipeline:
|
||||
return datetime.datetime.now(datetime.timezone.utc) - self.init_time
|
||||
|
||||
def send_pipeline_event(self, is_indexing: bool = False):
|
||||
fingerprint = sha1(json.dumps(self.get_config(), sort_keys=True).encode()).hexdigest()
|
||||
json_repr = json.dumps(self.get_config(), sort_keys=True, default=lambda o: "<not serializable>")
|
||||
fingerprint = sha1(json_repr.encode()).hexdigest()
|
||||
send_custom_event(
|
||||
"pipeline",
|
||||
payload={
|
||||
|
||||
@ -1,19 +1,17 @@
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
import os
|
||||
import ssl
|
||||
import json
|
||||
import platform
|
||||
import sys
|
||||
import datetime
|
||||
from typing import Tuple
|
||||
from copy import deepcopy
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from requests import PreparedRequest
|
||||
import responses
|
||||
import logging
|
||||
from transformers import pipeline
|
||||
import yaml
|
||||
import pandas as pd
|
||||
|
||||
from haystack import __version__
|
||||
from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
|
||||
@ -36,16 +34,13 @@ from haystack.pipelines import (
|
||||
DocumentSearchPipeline,
|
||||
QuestionGenerationPipeline,
|
||||
MostSimilarDocumentsPipeline,
|
||||
BaseStandardPipeline,
|
||||
)
|
||||
from haystack.pipelines.config import validate_config_strings, get_component_definitions
|
||||
from haystack.pipelines.utils import generate_code
|
||||
from haystack.errors import PipelineConfigError
|
||||
from haystack.nodes import PreProcessor, TextConverter, QuestionGenerator
|
||||
from haystack.nodes import PreProcessor, TextConverter
|
||||
from haystack.utils.deepsetcloud import DeepsetCloudError
|
||||
from haystack import Document, Answer
|
||||
from haystack.nodes.other.route_documents import RouteDocuments
|
||||
from haystack.nodes.other.join_answers import JoinAnswers
|
||||
from haystack import Answer
|
||||
|
||||
from ..conftest import (
|
||||
MOCK_DC,
|
||||
@ -2158,3 +2153,44 @@ def test_fix_to_pipeline_execution_when_join_follows_join():
|
||||
res = pipeline.run(query="Alpha Beta Gamma Delta")
|
||||
documents = res["documents"]
|
||||
assert len(documents) == 4 # all four documents should be found
|
||||
|
||||
|
||||
def test_send_pipeline_event():
|
||||
"""
|
||||
Test the event can be sent and the internal fields are correctly set
|
||||
"""
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_node(MockNode(), name="mock_node", inputs=["Query"])
|
||||
|
||||
with mock.patch("haystack.pipelines.base.send_custom_event") as mocked_send:
|
||||
today_at_midnight = datetime.datetime.combine(datetime.datetime.now(), datetime.time.min, datetime.timezone.utc)
|
||||
pipeline.send_pipeline_event()
|
||||
mocked_send.assert_called_once()
|
||||
assert pipeline.time_of_last_sent_event == today_at_midnight
|
||||
assert pipeline.last_window_run_total == 0
|
||||
|
||||
|
||||
def test_send_pipeline_event_unserializable_param():
|
||||
"""
|
||||
Test the event can be sent even when a certain component was initialized with a
|
||||
non-serializable parameter, see https://github.com/deepset-ai/haystack/issues/3833
|
||||
"""
|
||||
|
||||
class CustomNode(MockNode):
|
||||
"""A mock node that can be inited passing a param"""
|
||||
|
||||
def __init__(self, param):
|
||||
self.param = param
|
||||
|
||||
# create a custom node passing a parameter that can't be serialized (an empty set)
|
||||
custom_node = CustomNode(param=set())
|
||||
|
||||
pipeline = Pipeline()
|
||||
pipeline.add_node(custom_node, name="custom_node", inputs=["Query"])
|
||||
|
||||
with mock.patch("haystack.pipelines.base.send_custom_event") as mocked_send:
|
||||
today_at_midnight = datetime.datetime.combine(datetime.datetime.now(), datetime.time.min, datetime.timezone.utc)
|
||||
pipeline.send_pipeline_event()
|
||||
mocked_send.assert_called_once()
|
||||
assert pipeline.time_of_last_sent_event == today_at_midnight
|
||||
assert pipeline.last_window_run_total == 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user