feat: Improve ChatMessage _deserialize_content ValueError - make it more LLM friendly (#9484)

* Improve ChatMessage _deserialize_content ValueError - make it more LLM friendly * Add unit test * Add reno note * Add descriptive ValueError for missing role * Update haystack/dataclasses/chat_message.py Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Update releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Add role check in ChatMessage * fixes + refinements --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
2025-12-12 15:27:06 +00:00 · 2025-06-04 17:14:05 +02:00 · 2025-06-04 17:14:05 +02:00 · 853a32f8da
commit 853a32f8da
parent db359cff40
3 changed files with 34 additions and 5 deletions
--- a/haystack/dataclasses/chat_message.py
+++ b/haystack/dataclasses/chat_message.py
@ -113,7 +113,14 @@ def _deserialize_content(serialized_content: List[Dict[str, Any]]) -> List[ChatM
            tcr = ToolCallResult(result=result, origin=origin, error=error)
            content.append(tcr)
        else:
-            raise ValueError(f"Unsupported part in serialized ChatMessage: `{part}`")
+            raise ValueError(
+                f"Unsupported content part in the serialized ChatMessage: {part}. "
+                "The `content` field of the serialized ChatMessage must be a list of dictionaries, where each "
+                "dictionary contains one of these keys: 'text', 'tool_call', or 'tool_call_result'. "
+                f"Valid formats: [{{'text': 'Hello'}}, "
+                f"{{'tool_call': {{'tool_name': 'search', 'arguments': {{}}, 'id': 'call_123'}}}}, "
+                f"{{'tool_call_result': {{'result': 'data', 'origin': {{...}}, 'error': false}}}}]"
+            )

    return content

@ -360,6 +367,14 @@ class ChatMessage:
        :returns:
            The created object.
        """
+        if not "role" in data and not "_role" in data:
+            raise ValueError(
+                "The `role` field is required in the message dictionary. "
+                f"Expected a dictionary with 'role' field containing one of: {[role.value for role in ChatRole]}. "
+                f"Common roles are 'user' (for user messages) and 'assistant' (for AI responses). "
+                f"Received dictionary with keys: {list(data.keys())}"
+            )
+
        if "content" in data:
            init_params: Dict[str, Any] = {
                "_role": ChatRole(data["role"]),
--- a/releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml
+++ b/releasenotes/notes/improve-chatmessage-error-messages-llm-agents-a1b2c3d4e5f6g7h8.yaml
@ -0,0 +1,7 @@
+enhancements:
+  - |
+    Improved error messages in ChatMessage deserialization to provide clearer guidance for LLM-agent use cases.
+    The `_deserialize_content` function now provides detailed error messages when ChatMessage content format
+    is invalid, including the expected structure (list of dictionaries with 'text', 'tool_call', or
+    'tool_call_result' keys) and concrete examples. This enhancement reduces debugging cycles and improves
+    LLM self-correction capabilities when working with agent tools and structured message formats.
--- a/test/dataclasses/test_chat_message.py
+++ b/test/dataclasses/test_chat_message.py
@ -201,12 +201,19 @@ def test_to_dict_with_invalid_content_type():


 def test_from_dict_with_invalid_content_type():
-    data = {"_role": "assistant", "_content": [{"text": "Hello"}, "invalid"]}
-    with pytest.raises(ValueError):
+    data = {"role": "assistant", "content": [{"text": "Hello"}, "invalid"]}
+    with pytest.raises(ValueError, match="Unsupported content part in the serialized ChatMessage"):
        ChatMessage.from_dict(data)

-    data = {"_role": "assistant", "_content": [{"text": "Hello"}, {"invalid": "invalid"}]}
-    with pytest.raises(ValueError):
+    data = {"role": "assistant", "content": [{"text": "Hello"}, {"invalid": "invalid"}]}
+    with pytest.raises(ValueError, match="Unsupported content part in the serialized ChatMessage"):
+        ChatMessage.from_dict(data)
+
+
+def test_from_dict_with_missing_role():
+    data = {"content": [{"text": "Hello"}], "meta": {}}
+
+    with pytest.raises(ValueError, match=r"The `role` field is required"):
        ChatMessage.from_dict(data)