Merge branch 'main' into feature/chinese-document-splitter

This commit is contained in:
David S. Batista 2025-06-04 14:20:20 +01:00 committed by GitHub
commit eda0f79150
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 84 additions and 20 deletions

View File

@ -1,7 +1,7 @@
loaders:
- type: haystack_pydoc_tools.loaders.CustomPythonLoader
search_path: [../../../haystack/components/agents]
modules: ["agent"]
modules: ["agent", "state"]
ignore_when_discovered: ["__init__"]
processors:
- type: filter

View File

@ -512,13 +512,18 @@ class _Component:
# no decorators here
def run(self, value: int):
return {"output_1": 1, "output_2": "2"}
# also no decorators here
async def run_async(self, value: int):
return {"output_1": 1, "output_2": "2"}
```
"""
has_decorator = hasattr(instance.run, "_output_types_cache")
if has_decorator:
has_run_decorator = hasattr(instance.run, "_output_types_cache")
has_run_async_decorator = hasattr(instance, "run_async") and hasattr(instance.run_async, "_output_types_cache")
if has_run_decorator or has_run_async_decorator:
raise ComponentError(
"Cannot call `set_output_types` on a component that already has "
"the 'output_types' decorator on its `run` method"
"Cannot call `set_output_types` on a component that already has the 'output_types' decorator on its "
"`run` or `run_async` methods."
)
instance.__haystack_output__ = Sockets(

View File

@ -388,9 +388,19 @@ class ChatMessage:
raise ValueError(f"Missing 'content' or '_content' in serialized ChatMessage: `{data}`")
def to_openai_dict_format(self) -> Dict[str, Any]:
def to_openai_dict_format(self, require_tool_call_ids: bool = True) -> Dict[str, Any]:
"""
Convert a ChatMessage to the dictionary format expected by OpenAI's Chat API.
:param require_tool_call_ids:
If True (default), enforces that each Tool Call includes a non-null `id` attribute.
Set to False to allow Tool Calls without `id`, which may be suitable for shallow OpenAI-compatible APIs.
:returns:
The ChatMessage in the format expected by OpenAI's Chat API.
:raises ValueError:
If the message format is invalid, or if `require_tool_call_ids` is True and any Tool Call is missing an
`id` attribute.
"""
text_contents = self.texts
tool_calls = self.tool_calls
@ -411,10 +421,12 @@ class ChatMessage:
if tool_call_results:
result = tool_call_results[0]
if result.origin.id is None:
raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
openai_msg["content"] = result.result
if result.origin.id is not None:
openai_msg["tool_call_id"] = result.origin.id
elif require_tool_call_ids:
raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
# OpenAI does not provide a way to communicate errors in tool invocations, so we ignore the error field
return openai_msg
@ -422,17 +434,19 @@ class ChatMessage:
openai_msg["content"] = text_contents[0]
if tool_calls:
openai_tool_calls = []
for tc in tool_calls:
if tc.id is None:
raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
openai_tool_calls.append(
{
"id": tc.id,
openai_tool_call = {
"type": "function",
# We disable ensure_ascii so special chars like emojis are not converted
"function": {"name": tc.tool_name, "arguments": json.dumps(tc.arguments, ensure_ascii=False)},
}
)
if tc.id is not None:
openai_tool_call["id"] = tc.id
elif require_tool_call_ids:
raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.")
openai_tool_calls.append(openai_tool_call)
openai_msg["tool_calls"] = openai_tool_calls
return openai_msg

View File

@ -0,0 +1,4 @@
---
fixes:
- |
When calling `set_output_types` we now also check that the decorator @component.output_types is not present on the `run_async` method of a Component. Previously we only checked that the Component.run method did not possess the decorator.

View File

@ -0,0 +1,7 @@
---
enhancements:
- |
Add a new parameter `require_tool_call_ids` to `ChatMessage.to_openai_dict_format`.
The default is `True`, for compatibility with OpenAI's Chat API: if the `id` field is missing in a Tool Call,
an error is raised. Using `False` is useful for shallow OpenAI-compatible APIs, where the `id` field is not
required.

View File

@ -328,6 +328,23 @@ def test_output_types_decorator_and_set_output_types():
_ = MockComponent()
def test_output_types_decorator_and_set_output_types_async():
@component
class MockComponent:
def __init__(self) -> None:
component.set_output_types(self, value=int)
def run(self, value: int):
return {"value": 1}
@component.output_types(value=int)
async def run_async(self, value: int):
return {"value": 1}
with pytest.raises(ComponentError, match="Cannot call `set_output_types`"):
_ = MockComponent()
def test_output_types_decorator_mismatch_run_async_run():
@component
class MockComponent:

View File

@ -342,14 +342,31 @@ def test_to_openai_dict_format_invalid():
with pytest.raises(ValueError):
message.to_openai_dict_format()
def test_to_openai_dict_format_require_tool_call_ids():
tool_call_null_id = ToolCall(id=None, tool_name="weather", arguments={"city": "Paris"})
message = ChatMessage.from_assistant(tool_calls=[tool_call_null_id])
with pytest.raises(ValueError):
message.to_openai_dict_format()
message.to_openai_dict_format(require_tool_call_ids=True)
message = ChatMessage.from_tool(tool_result="result", origin=tool_call_null_id)
with pytest.raises(ValueError):
message.to_openai_dict_format()
message.to_openai_dict_format(require_tool_call_ids=True)
def test_to_openai_dict_format_require_tool_call_ids_false():
tool_call_null_id = ToolCall(id=None, tool_name="weather", arguments={"city": "Paris"})
message = ChatMessage.from_assistant(tool_calls=[tool_call_null_id])
openai_msg = message.to_openai_dict_format(require_tool_call_ids=False)
assert openai_msg == {
"role": "assistant",
"tool_calls": [{"type": "function", "function": {"name": "weather", "arguments": '{"city": "Paris"}'}}],
}
message = ChatMessage.from_tool(tool_result="result", origin=tool_call_null_id)
openai_msg = message.to_openai_dict_format(require_tool_call_ids=False)
assert openai_msg == {"role": "tool", "content": "result"}
def test_from_openai_dict_format_user_message():