From 95dafdc20b0988dfcbfc06cfe92c1d076c82f2c4 Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Thu, 28 Aug 2025 15:27:09 +0200 Subject: [PATCH] fix: reintroduce helpful error message in `ChatMessage` deserialization (#9748) * fix: reintroduce helpful error message in deserialization * fix fmt --- haystack/dataclasses/chat_message.py | 14 +++++++++++++- ...rialization-error-message-c0a02f3c9be08092.yaml | 8 ++++++++ test/dataclasses/test_chat_message.py | 4 ++-- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/chatmessage-deserialization-error-message-c0a02f3c9be08092.yaml diff --git a/haystack/dataclasses/chat_message.py b/haystack/dataclasses/chat_message.py index 4ffb29505..9d7a2af1e 100644 --- a/haystack/dataclasses/chat_message.py +++ b/haystack/dataclasses/chat_message.py @@ -209,7 +209,17 @@ def _deserialize_content_part(part: dict[str, Any]) -> ChatMessageContentT: if serialization_key in part: return cls.from_dict(part[serialization_key]) - raise ValueError(f"Unsupported content part in the serialized ChatMessage: `{part}`") + # NOTE: this verbose error message provides guidance to LLMs when creating invalid messages during agent runs + msg = ( + f"Unsupported content part in the serialized ChatMessage: {part}. " + "The `content` field of the serialized ChatMessage must be a list of dictionaries, where each " + "dictionary contains one of these keys: 'text', 'image', 'reasoning', 'tool_call', or 'tool_call_result'. " + "Valid formats: [{'text': 'Hello'}, {'image': {'base64_image': '...', ...}}, " + "{'reasoning': {'reasoning_text': 'I think...', 'extra': {...}}}, " + "{'tool_call': {'tool_name': 'search', 'arguments': {}, 'id': 'call_123'}}, " + "{'tool_call_result': {'result': 'data', 'origin': {...}, 'error': false}}]" + ) + raise ValueError(msg) def _serialize_content_part(part: ChatMessageContentT) -> dict[str, Any]: @@ -530,6 +540,8 @@ class ChatMessage: # pylint: disable=too-many-public-methods # it's OK since we :returns: The created object. """ + + # NOTE: this verbose error message provides guidance to LLMs when creating invalid messages during agent runs if not "role" in data and not "_role" in data: raise ValueError( "The `role` field is required in the message dictionary. " diff --git a/releasenotes/notes/chatmessage-deserialization-error-message-c0a02f3c9be08092.yaml b/releasenotes/notes/chatmessage-deserialization-error-message-c0a02f3c9be08092.yaml new file mode 100644 index 000000000..ecd626caa --- /dev/null +++ b/releasenotes/notes/chatmessage-deserialization-error-message-c0a02f3c9be08092.yaml @@ -0,0 +1,8 @@ +--- + +fixes: + - | + Reintroduce verbose error message when deserializing a `ChatMessage` with invalid content parts. + While LLMs may still generate messages in the wrong format, this error provides guidance on the expected structure, + making retries easier and more reliable during agent runs. + The error message was unintentionally removed during a previous refactoring. diff --git a/test/dataclasses/test_chat_message.py b/test/dataclasses/test_chat_message.py index 98bfd78e3..37e88fc42 100644 --- a/test/dataclasses/test_chat_message.py +++ b/test/dataclasses/test_chat_message.py @@ -398,11 +398,11 @@ class TestChatMessage: def test_from_dict_with_invalid_content_type(self): data = {"role": "assistant", "content": [{"text": "Hello"}, "invalid"]} - with pytest.raises(ValueError, match="Unsupported content part in the serialized ChatMessage"): + with pytest.raises(ValueError, match=r"Unsupported content part.*Valid formats.*"): ChatMessage.from_dict(data) data = {"role": "assistant", "content": [{"text": "Hello"}, {"invalid": "invalid"}]} - with pytest.raises(ValueError, match="Unsupported content part in the serialized ChatMessage"): + with pytest.raises(ValueError, match=r"Unsupported content part.*Valid formats.*"): ChatMessage.from_dict(data) def test_from_dict_with_missing_role(self):