From 853a32f8da94b19aeb837d0409571c7908022ea2 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Wed, 4 Jun 2025 17:14:05 +0200 Subject: [PATCH] feat: Improve ChatMessage _deserialize_content ValueError - make it more LLM friendly (#9484) * Improve ChatMessage _deserialize_content ValueError - make it more LLM friendly * Add unit test * Add reno note * Add descriptive ValueError for missing role * Update haystack/dataclasses/chat_message.py Co-authored-by: Stefano Fiorucci * Update releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml Co-authored-by: Stefano Fiorucci * Add role check in ChatMessage * fixes + refinements --------- Co-authored-by: Stefano Fiorucci --- haystack/dataclasses/chat_message.py | 17 ++++++++++++++++- ...or-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml | 7 +++++++ test/dataclasses/test_chat_message.py | 15 +++++++++++---- 3 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml diff --git a/haystack/dataclasses/chat_message.py b/haystack/dataclasses/chat_message.py index d0271083d..535a7ff85 100644 --- a/haystack/dataclasses/chat_message.py +++ b/haystack/dataclasses/chat_message.py @@ -113,7 +113,14 @@ def _deserialize_content(serialized_content: List[Dict[str, Any]]) -> List[ChatM tcr = ToolCallResult(result=result, origin=origin, error=error) content.append(tcr) else: - raise ValueError(f"Unsupported part in serialized ChatMessage: `{part}`") + raise ValueError( + f"Unsupported content part in the serialized ChatMessage: {part}. " + "The `content` field of the serialized ChatMessage must be a list of dictionaries, where each " + "dictionary contains one of these keys: 'text', 'tool_call', or 'tool_call_result'. " + f"Valid formats: [{{'text': 'Hello'}}, " + f"{{'tool_call': {{'tool_name': 'search', 'arguments': {{}}, 'id': 'call_123'}}}}, " + f"{{'tool_call_result': {{'result': 'data', 'origin': {{...}}, 'error': false}}}}]" + ) return content @@ -360,6 +367,14 @@ class ChatMessage: :returns: The created object. """ + if not "role" in data and not "_role" in data: + raise ValueError( + "The `role` field is required in the message dictionary. " + f"Expected a dictionary with 'role' field containing one of: {[role.value for role in ChatRole]}. " + f"Common roles are 'user' (for user messages) and 'assistant' (for AI responses). " + f"Received dictionary with keys: {list(data.keys())}" + ) + if "content" in data: init_params: Dict[str, Any] = { "_role": ChatRole(data["role"]), diff --git a/releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml b/releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml new file mode 100644 index 000000000..a015ccc65 --- /dev/null +++ b/releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml @@ -0,0 +1,7 @@ +enhancements: + - | + Improved error messages in ChatMessage deserialization to provide clearer guidance for LLM-agent use cases. + The `_deserialize_content` function now provides detailed error messages when ChatMessage content format + is invalid, including the expected structure (list of dictionaries with 'text', 'tool_call', or + 'tool_call_result' keys) and concrete examples. This enhancement reduces debugging cycles and improves + LLM self-correction capabilities when working with agent tools and structured message formats. diff --git a/test/dataclasses/test_chat_message.py b/test/dataclasses/test_chat_message.py index eb441951a..4a2ac5263 100644 --- a/test/dataclasses/test_chat_message.py +++ b/test/dataclasses/test_chat_message.py @@ -201,12 +201,19 @@ def test_to_dict_with_invalid_content_type(): def test_from_dict_with_invalid_content_type(): - data = {"_role": "assistant", "_content": [{"text": "Hello"}, "invalid"]} - with pytest.raises(ValueError): + data = {"role": "assistant", "content": [{"text": "Hello"}, "invalid"]} + with pytest.raises(ValueError, match="Unsupported content part in the serialized ChatMessage"): ChatMessage.from_dict(data) - data = {"_role": "assistant", "_content": [{"text": "Hello"}, {"invalid": "invalid"}]} - with pytest.raises(ValueError): + data = {"role": "assistant", "content": [{"text": "Hello"}, {"invalid": "invalid"}]} + with pytest.raises(ValueError, match="Unsupported content part in the serialized ChatMessage"): + ChatMessage.from_dict(data) + + +def test_from_dict_with_missing_role(): + data = {"content": [{"text": "Hello"}], "meta": {}} + + with pytest.raises(ValueError, match=r"The `role` field is required"): ChatMessage.from_dict(data)