refactor: Refactor hf api chat generator (#9449)

* Refactor HFAPI Chat Generator

* Add component info to generators

* Fix type hint

* Add reno

* Fix unit tests

* Remove incorrect dev comment

* Move _convert_streaming_chunks_to_chat_message to utils file
This commit is contained in:
Sebastian Husch Lee 2025-05-27 15:55:06 +02:00 committed by GitHub
parent 3deaa20cb6
commit 81c0cefa41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 504 additions and 464 deletions

View File

@ -7,6 +7,7 @@ from datetime import datetime
from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union
from haystack import component, default_from_dict, default_to_dict, logging from haystack import component, default_from_dict, default_to_dict, logging
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
from haystack.dataclasses import ChatMessage, ComponentInfo, StreamingChunk, ToolCall, select_streaming_callback from haystack.dataclasses import ChatMessage, ComponentInfo, StreamingChunk, ToolCall, select_streaming_callback
from haystack.dataclasses.streaming_chunk import StreamingCallbackT from haystack.dataclasses.streaming_chunk import StreamingCallbackT
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
@ -101,6 +102,35 @@ def _convert_tools_to_hfapi_tools(
return hf_tools return hf_tools
def _convert_chat_completion_stream_output_to_streaming_chunk(
chunk: "ChatCompletionStreamOutput", component_info: Optional[ComponentInfo] = None
) -> StreamingChunk:
"""
Converts the Hugging Face API ChatCompletionStreamOutput to a StreamingChunk.
"""
# Choices is empty if include_usage is set to True where the usage information is returned.
if len(chunk.choices) == 0:
usage = None
if chunk.usage:
usage = {"prompt_tokens": chunk.usage.prompt_tokens, "completion_tokens": chunk.usage.completion_tokens}
return StreamingChunk(
content="",
meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "usage": usage},
component_info=component_info,
)
# n is unused, so the API always returns only one choice
# the argument is probably allowed for compatibility with OpenAI
# see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
choice = chunk.choices[0]
stream_chunk = StreamingChunk(
content=choice.delta.content or "",
meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "finish_reason": choice.finish_reason},
component_info=component_info,
)
return stream_chunk
@component @component
class HuggingFaceAPIChatGenerator: class HuggingFaceAPIChatGenerator:
""" """
@ -403,55 +433,19 @@ class HuggingFaceAPIChatGenerator:
**generation_kwargs, **generation_kwargs,
) )
generated_text = ""
first_chunk_time = None
finish_reason = None
usage = None
meta: Dict[str, Any] = {}
# get the component name and type
component_info = ComponentInfo.from_component(self) component_info = ComponentInfo.from_component(self)
streaming_chunks = []
# Set up streaming handler
for chunk in api_output: for chunk in api_output:
# The chunk with usage returns an empty array for choices streaming_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(
if len(chunk.choices) > 0: chunk=chunk, component_info=component_info
# n is unused, so the API always returns only one choice )
# the argument is probably allowed for compatibility with OpenAI streaming_chunks.append(streaming_chunk)
# see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n streaming_callback(streaming_chunk)
choice = chunk.choices[0]
text = choice.delta.content or "" message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks)
generated_text += text if message.meta.get("usage") is None:
message.meta["usage"] = {"prompt_tokens": 0, "completion_tokens": 0}
if choice.finish_reason:
finish_reason = choice.finish_reason
stream_chunk = StreamingChunk(content=text, meta=meta, component_info=component_info)
streaming_callback(stream_chunk)
if chunk.usage:
usage = chunk.usage
if first_chunk_time is None:
first_chunk_time = datetime.now().isoformat()
if usage:
usage_dict = {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens}
else:
usage_dict = {"prompt_tokens": 0, "completion_tokens": 0}
meta.update(
{
"model": self._client.model,
"index": 0,
"finish_reason": finish_reason,
"usage": usage_dict,
"completion_start_time": first_chunk_time,
}
)
message = ChatMessage.from_assistant(text=generated_text, meta=meta)
return {"replies": [message]} return {"replies": [message]}
def _run_non_streaming( def _run_non_streaming(
@ -503,51 +497,19 @@ class HuggingFaceAPIChatGenerator:
**generation_kwargs, **generation_kwargs,
) )
generated_text = ""
first_chunk_time = None
finish_reason = None
usage = None
meta: Dict[str, Any] = {}
# get the component name and type
component_info = ComponentInfo.from_component(self) component_info = ComponentInfo.from_component(self)
streaming_chunks = []
async for chunk in api_output: async for chunk in api_output:
# The chunk with usage returns an empty array for choices stream_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(
if len(chunk.choices) > 0: chunk=chunk, component_info=component_info
# n is unused, so the API always returns only one choice )
# the argument is probably allowed for compatibility with OpenAI streaming_chunks.append(stream_chunk)
# see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n await streaming_callback(stream_chunk) # type: ignore
choice = chunk.choices[0]
text = choice.delta.content or "" message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks)
generated_text += text if message.meta.get("usage") is None:
message.meta["usage"] = {"prompt_tokens": 0, "completion_tokens": 0}
stream_chunk = StreamingChunk(content=text, meta=meta, component_info=component_info)
await streaming_callback(stream_chunk) # type: ignore
if chunk.usage:
usage = chunk.usage
if first_chunk_time is None:
first_chunk_time = datetime.now().isoformat()
if usage:
usage_dict = {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens}
else:
usage_dict = {"prompt_tokens": 0, "completion_tokens": 0}
meta.update(
{
"model": self._async_client.model,
"index": 0,
"finish_reason": finish_reason,
"usage": usage_dict,
"completion_start_time": first_chunk_time,
}
)
message = ChatMessage.from_assistant(text=generated_text, meta=meta)
return {"replies": [message]} return {"replies": [message]}
async def _run_non_streaming_async( async def _run_non_streaming_async(

View File

@ -389,7 +389,10 @@ class HuggingFaceLocalChatGenerator:
component_info = ComponentInfo.from_component(self) component_info = ComponentInfo.from_component(self)
# streamer parameter hooks into HF streaming, HFTokenStreamingHandler is an adapter to our streaming # streamer parameter hooks into HF streaming, HFTokenStreamingHandler is an adapter to our streaming
generation_kwargs["streamer"] = HFTokenStreamingHandler( generation_kwargs["streamer"] = HFTokenStreamingHandler(
tokenizer, streaming_callback, stop_words, component_info tokenizer=tokenizer,
stream_handler=streaming_callback,
stop_words=stop_words,
component_info=component_info,
) )
# convert messages to HF format # convert messages to HF format

View File

@ -13,6 +13,7 @@ from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
from haystack import component, default_from_dict, default_to_dict, logging from haystack import component, default_from_dict, default_to_dict, logging
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
from haystack.dataclasses import ( from haystack.dataclasses import (
AsyncStreamingCallbackT, AsyncStreamingCallbackT,
ChatMessage, ChatMessage,
@ -455,69 +456,6 @@ def _check_finish_reason(meta: Dict[str, Any]) -> None:
) )
def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> ChatMessage:
"""
Connects the streaming chunks into a single ChatMessage.
:param chunks: The list of all `StreamingChunk` objects.
:returns: The ChatMessage.
"""
text = "".join([chunk.content for chunk in chunks])
tool_calls = []
# Process tool calls if present in any chunk
tool_call_data: Dict[str, Dict[str, str]] = {} # Track tool calls by index
for chunk_payload in chunks:
tool_calls_meta = chunk_payload.meta.get("tool_calls")
if tool_calls_meta is not None:
for delta in tool_calls_meta:
# We use the index of the tool call to track it across chunks since the ID is not always provided
if delta.index not in tool_call_data:
tool_call_data[delta.index] = {"id": "", "name": "", "arguments": ""}
# Save the ID if present
if delta.id is not None:
tool_call_data[delta.index]["id"] = delta.id
if delta.function is not None:
if delta.function.name is not None:
tool_call_data[delta.index]["name"] += delta.function.name
if delta.function.arguments is not None:
tool_call_data[delta.index]["arguments"] += delta.function.arguments
# Convert accumulated tool call data into ToolCall objects
for call_data in tool_call_data.values():
try:
arguments = json.loads(call_data["arguments"])
tool_calls.append(ToolCall(id=call_data["id"], tool_name=call_data["name"], arguments=arguments))
except json.JSONDecodeError:
logger.warning(
"OpenAI returned a malformed JSON string for tool call arguments. This tool call "
"will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
"Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
_id=call_data["id"],
_name=call_data["name"],
_arguments=call_data["arguments"],
)
# finish_reason can appear in different places so we look for the last one
finish_reasons = [
chunk.meta.get("finish_reason") for chunk in chunks if chunk.meta.get("finish_reason") is not None
]
finish_reason = finish_reasons[-1] if finish_reasons else None
meta = {
"model": chunks[-1].meta.get("model"),
"index": 0,
"finish_reason": finish_reason,
"completion_start_time": chunks[0].meta.get("received_at"), # first chunk received
"usage": chunks[-1].meta.get("usage"), # last chunk has the final usage data if available
}
return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)
def _convert_chat_completion_to_chat_message(completion: ChatCompletion, choice: Choice) -> ChatMessage: def _convert_chat_completion_to_chat_message(completion: ChatCompletion, choice: Choice) -> ChatMessage:
""" """
Converts the non-streaming response from the OpenAI API to a ChatMessage. Converts the non-streaming response from the OpenAI API to a ChatMessage.

View File

@ -7,7 +7,7 @@ from datetime import datetime
from typing import Any, Dict, Iterable, List, Optional, Union, cast from typing import Any, Dict, Iterable, List, Optional, Union, cast
from haystack import component, default_from_dict, default_to_dict from haystack import component, default_from_dict, default_to_dict
from haystack.dataclasses import StreamingCallbackT, StreamingChunk, select_streaming_callback from haystack.dataclasses import ComponentInfo, StreamingCallbackT, StreamingChunk, select_streaming_callback
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
@ -220,6 +220,7 @@ class HuggingFaceAPIGenerator:
chunks: List[StreamingChunk] = [] chunks: List[StreamingChunk] = []
first_chunk_time = None first_chunk_time = None
component_info = ComponentInfo.from_component(self)
for chunk in hf_output: for chunk in hf_output:
token: TextGenerationStreamOutputToken = chunk.token token: TextGenerationStreamOutputToken = chunk.token
if token.special: if token.special:
@ -229,7 +230,7 @@ class HuggingFaceAPIGenerator:
if first_chunk_time is None: if first_chunk_time is None:
first_chunk_time = datetime.now().isoformat() first_chunk_time = datetime.now().isoformat()
stream_chunk = StreamingChunk(token.text, chunk_metadata) stream_chunk = StreamingChunk(content=token.text, meta=chunk_metadata, component_info=component_info)
chunks.append(stream_chunk) chunks.append(stream_chunk)
streaming_callback(stream_chunk) streaming_callback(stream_chunk)

View File

@ -5,7 +5,7 @@
from typing import Any, Dict, List, Literal, Optional, cast from typing import Any, Dict, List, Literal, Optional, cast
from haystack import component, default_from_dict, default_to_dict, logging from haystack import component, default_from_dict, default_to_dict, logging
from haystack.dataclasses import StreamingCallbackT, select_streaming_callback from haystack.dataclasses import ComponentInfo, StreamingCallbackT, select_streaming_callback
from haystack.lazy_imports import LazyImport from haystack.lazy_imports import LazyImport
from haystack.utils import ( from haystack.utils import (
ComponentDevice, ComponentDevice,
@ -256,9 +256,10 @@ class HuggingFaceLocalGenerator:
updated_generation_kwargs["num_return_sequences"] = 1 updated_generation_kwargs["num_return_sequences"] = 1
# streamer parameter hooks into HF streaming, HFTokenStreamingHandler is an adapter to our streaming # streamer parameter hooks into HF streaming, HFTokenStreamingHandler is an adapter to our streaming
updated_generation_kwargs["streamer"] = HFTokenStreamingHandler( updated_generation_kwargs["streamer"] = HFTokenStreamingHandler(
self.pipeline.tokenizer, # type: ignore tokenizer=self.pipeline.tokenizer, # type: ignore
streaming_callback, stream_handler=streaming_callback,
self.stop_words, # type: ignore stop_words=self.stop_words, # type: ignore
component_info=ComponentInfo.from_component(self),
) )
output = self.pipeline(prompt, stopping_criteria=self.stopping_criteria_list, **updated_generation_kwargs) # type: ignore output = self.pipeline(prompt, stopping_criteria=self.stopping_criteria_list, **updated_generation_kwargs) # type: ignore

View File

@ -13,8 +13,8 @@ from haystack.components.generators.chat.openai import (
_check_finish_reason, _check_finish_reason,
_convert_chat_completion_chunk_to_streaming_chunk, _convert_chat_completion_chunk_to_streaming_chunk,
_convert_chat_completion_to_chat_message, _convert_chat_completion_to_chat_message,
_convert_streaming_chunks_to_chat_message,
) )
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
from haystack.dataclasses import ( from haystack.dataclasses import (
ChatMessage, ChatMessage,
ComponentInfo, ComponentInfo,

View File

@ -2,11 +2,15 @@
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import Any, Dict import json
from typing import Any, Dict, List
from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
from haystack.dataclasses import StreamingChunk from haystack import logging
from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall
logger = logging.getLogger(__name__)
def print_streaming_chunk(chunk: StreamingChunk) -> None: def print_streaming_chunk(chunk: StreamingChunk) -> None:
@ -53,3 +57,66 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None:
# This ensures spacing between multiple LLM messages (e.g. Agent) # This ensures spacing between multiple LLM messages (e.g. Agent)
if chunk.meta.get("finish_reason") is not None: if chunk.meta.get("finish_reason") is not None:
print("\n\n", flush=True, end="") print("\n\n", flush=True, end="")
def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> ChatMessage:
"""
Connects the streaming chunks into a single ChatMessage.
:param chunks: The list of all `StreamingChunk` objects.
:returns: The ChatMessage.
"""
text = "".join([chunk.content for chunk in chunks])
tool_calls = []
# Process tool calls if present in any chunk
tool_call_data: Dict[str, Dict[str, str]] = {} # Track tool calls by index
for chunk_payload in chunks:
tool_calls_meta = chunk_payload.meta.get("tool_calls")
if tool_calls_meta is not None:
for delta in tool_calls_meta:
# We use the index of the tool call to track it across chunks since the ID is not always provided
if delta.index not in tool_call_data:
tool_call_data[delta.index] = {"id": "", "name": "", "arguments": ""}
# Save the ID if present
if delta.id is not None:
tool_call_data[delta.index]["id"] = delta.id
if delta.function is not None:
if delta.function.name is not None:
tool_call_data[delta.index]["name"] += delta.function.name
if delta.function.arguments is not None:
tool_call_data[delta.index]["arguments"] += delta.function.arguments
# Convert accumulated tool call data into ToolCall objects
for call_data in tool_call_data.values():
try:
arguments = json.loads(call_data["arguments"])
tool_calls.append(ToolCall(id=call_data["id"], tool_name=call_data["name"], arguments=arguments))
except json.JSONDecodeError:
logger.warning(
"OpenAI returned a malformed JSON string for tool call arguments. This tool call "
"will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
"Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
_id=call_data["id"],
_name=call_data["name"],
_arguments=call_data["arguments"],
)
# finish_reason can appear in different places so we look for the last one
finish_reasons = [
chunk.meta.get("finish_reason") for chunk in chunks if chunk.meta.get("finish_reason") is not None
]
finish_reason = finish_reasons[-1] if finish_reasons else None
meta = {
"model": chunks[-1].meta.get("model"),
"index": 0,
"finish_reason": finish_reason,
"completion_start_time": chunks[0].meta.get("received_at"), # first chunk received
"usage": chunks[-1].meta.get("usage"), # last chunk has the final usage data if available
}
return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)

View File

@ -0,0 +1,5 @@
---
enhancements:
- |
- Refactors the HuggingFaceAPIChatGenerator to use the util method `_convert_streaming_chunks_to_chat_message`. This is to help with being consistent for how we convert StreamingChunks into a final ChatMessage.
- We also add ComponentInfo to the StreamingChunks made in `HuggingFaceGenerator`, and `HugginFaceLocalGenerator` so we can tell from which component a stream is coming from.

View File

@ -24,6 +24,7 @@ from huggingface_hub import (
ChatCompletionStreamOutputChoice, ChatCompletionStreamOutputChoice,
ChatCompletionStreamOutputDelta, ChatCompletionStreamOutputDelta,
ChatCompletionInputStreamOptions, ChatCompletionInputStreamOptions,
ChatCompletionStreamOutputUsage,
) )
from huggingface_hub.errors import RepositoryNotFoundError from huggingface_hub.errors import RepositoryNotFoundError
@ -31,6 +32,7 @@ from haystack.components.generators.chat.hugging_face_api import (
HuggingFaceAPIChatGenerator, HuggingFaceAPIChatGenerator,
_convert_hfapi_tool_calls, _convert_hfapi_tool_calls,
_convert_tools_to_hfapi_tools, _convert_tools_to_hfapi_tools,
_convert_chat_completion_stream_output_to_streaming_chunk,
) )
from haystack.tools import Tool from haystack.tools import Tool
@ -661,6 +663,80 @@ class TestHuggingFaceAPIChatGenerator:
tool_calls = _convert_hfapi_tool_calls(hfapi_tool_calls) tool_calls = _convert_hfapi_tool_calls(hfapi_tool_calls)
assert len(tool_calls) == 0 assert len(tool_calls) == 0
@pytest.mark.parametrize(
"hf_stream_output, expected_stream_chunk",
[
(
ChatCompletionStreamOutput(
choices=[
ChatCompletionStreamOutputChoice(
delta=ChatCompletionStreamOutputDelta(role="assistant", content=" Paris"), index=0
)
],
created=1748339326,
id="",
model="microsoft/Phi-3.5-mini-instruct",
system_fingerprint="3.2.1-sha-4d28897",
),
StreamingChunk(
content=" Paris",
meta={
"received_at": "2025-05-27T12:14:28.228852",
"model": "microsoft/Phi-3.5-mini-instruct",
"finish_reason": None,
},
),
),
(
ChatCompletionStreamOutput(
choices=[
ChatCompletionStreamOutputChoice(
delta=ChatCompletionStreamOutputDelta(role="assistant", content=""),
index=0,
finish_reason="stop",
)
],
created=1748339326,
id="",
model="microsoft/Phi-3.5-mini-instruct",
system_fingerprint="3.2.1-sha-4d28897",
),
StreamingChunk(
content="",
meta={
"received_at": "2025-05-27T12:14:28.228852",
"model": "microsoft/Phi-3.5-mini-instruct",
"finish_reason": "stop",
},
),
),
(
ChatCompletionStreamOutput(
choices=[],
created=1748339326,
id="",
model="microsoft/Phi-3.5-mini-instruct",
system_fingerprint="3.2.1-sha-4d28897",
usage=ChatCompletionStreamOutputUsage(completion_tokens=2, prompt_tokens=21, total_tokens=23),
),
StreamingChunk(
content="",
meta={
"received_at": "2025-05-27T12:14:28.228852",
"model": "microsoft/Phi-3.5-mini-instruct",
"usage": {"completion_tokens": 2, "prompt_tokens": 21},
},
),
),
],
)
def test_convert_chat_completion_stream_output_to_streaming_chunk(self, hf_stream_output, expected_stream_chunk):
converted_stream_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(chunk=hf_stream_output)
# Remove timestamp from comparison since it's always the current time
converted_stream_chunk.meta.pop("received_at", None)
expected_stream_chunk.meta.pop("received_at", None)
assert converted_stream_chunk == expected_stream_chunk
@pytest.mark.integration @pytest.mark.integration
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skipif( @pytest.mark.skipif(

View File

@ -20,14 +20,13 @@ from openai.types.chat import chat_completion_chunk
from haystack import component from haystack import component
from haystack.components.generators.utils import print_streaming_chunk from haystack.components.generators.utils import print_streaming_chunk
from haystack.dataclasses import StreamingChunk, ComponentInfo from haystack.dataclasses import StreamingChunk
from haystack.utils.auth import Secret from haystack.utils.auth import Secret
from haystack.dataclasses import ChatMessage, ToolCall from haystack.dataclasses import ChatMessage, ToolCall
from haystack.tools import ComponentTool, Tool from haystack.tools import ComponentTool, Tool
from haystack.components.generators.chat.openai import ( from haystack.components.generators.chat.openai import (
OpenAIChatGenerator, OpenAIChatGenerator,
_check_finish_reason, _check_finish_reason,
_convert_streaming_chunks_to_chat_message,
_convert_chat_completion_chunk_to_streaming_chunk, _convert_chat_completion_chunk_to_streaming_chunk,
) )
from haystack.tools.toolset import Toolset from haystack.tools.toolset import Toolset
@ -598,309 +597,6 @@ class TestOpenAIChatGenerator:
assert message.meta["finish_reason"] == "tool_calls" assert message.meta["finish_reason"] == "tool_calls"
assert message.meta["usage"]["completion_tokens"] == 47 assert message.meta["usage"]["completion_tokens"] == 47
def test_convert_streaming_chunks_to_chat_message_tool_calls_in_any_chunk(self):
chunk = chat_completion_chunk.ChatCompletionChunk(
id="chatcmpl-B2g1XYv1WzALulC5c8uLtJgvEB48I",
choices=[
chat_completion_chunk.Choice(
delta=chat_completion_chunk.ChoiceDelta(
content=None, function_call=None, refusal=None, role=None, tool_calls=None
),
finish_reason="tool_calls",
index=0,
logprobs=None,
)
],
created=1739977895,
model="gpt-4o-mini-2024-07-18",
object="chat.completion.chunk",
service_tier="default",
system_fingerprint="fp_00428b782a",
usage=None,
)
chunks = [
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": None,
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.910076",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id="call_ZOj5l67zhZOx6jqjg7ATQwb6",
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(
arguments="", name="rag_pipeline_tool"
),
type="function",
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.913919",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"qu', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.914439",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ery":', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.924146",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments=' "Wher', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.924420",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="e do", name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.944398",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="es Ma", name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.944958",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="rk liv", name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.945507",
},
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='e?"}', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.946018",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id="call_STxsYY69wVOvxWqopAt3uWTB",
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(
arguments="", name="get_weather"
),
type="function",
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.946578",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"ci', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.946981",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ty": ', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.947411",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='"Berli', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.947643",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='n"}', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.947939",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": None,
"finish_reason": "tool_calls",
"received_at": "2025-02-19T16:02:55.948772",
},
component_info=ComponentInfo(name="test", type="test"),
),
]
# Convert chunks to a chat message
result = _convert_streaming_chunks_to_chat_message(chunks=chunks)
assert not result.texts
assert not result.text
# Verify both tool calls were found and processed
assert len(result.tool_calls) == 2
assert result.tool_calls[0].id == "call_ZOj5l67zhZOx6jqjg7ATQwb6"
assert result.tool_calls[0].tool_name == "rag_pipeline_tool"
assert result.tool_calls[0].arguments == {"query": "Where does Mark live?"}
assert result.tool_calls[1].id == "call_STxsYY69wVOvxWqopAt3uWTB"
assert result.tool_calls[1].tool_name == "get_weather"
assert result.tool_calls[1].arguments == {"city": "Berlin"}
# Verify meta information
assert result.meta["model"] == "gpt-4o-mini-2024-07-18"
assert result.meta["finish_reason"] == "tool_calls"
assert result.meta["index"] == 0
assert result.meta["completion_start_time"] == "2025-02-19T16:02:55.910076"
def test_convert_usage_chunk_to_streaming_chunk(self): def test_convert_usage_chunk_to_streaming_chunk(self):
chunk = ChatCompletionChunk( chunk = ChatCompletionChunk(
id="chatcmpl-BC1y4wqIhe17R8sv3lgLcWlB4tXCw", id="chatcmpl-BC1y4wqIhe17R8sv3lgLcWlB4tXCw",

View File

@ -0,0 +1,291 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
from openai.types.chat import chat_completion_chunk
from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
from haystack.dataclasses import ComponentInfo, StreamingChunk
def test_convert_streaming_chunks_to_chat_message_tool_calls_in_any_chunk():
chunks = [
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": None,
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.910076",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id="call_ZOj5l67zhZOx6jqjg7ATQwb6",
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(
arguments="", name="rag_pipeline_tool"
),
type="function",
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.913919",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"qu', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.914439",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ery":', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.924146",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments=' "Wher', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.924420",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="e do", name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.944398",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="es Ma", name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.944958",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="rk liv", name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.945507",
},
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=0,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='e?"}', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.946018",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id="call_STxsYY69wVOvxWqopAt3uWTB",
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="", name="get_weather"),
type="function",
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.946578",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"ci', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.946981",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ty": ', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.947411",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='"Berli', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.947643",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": [
chat_completion_chunk.ChoiceDeltaToolCall(
index=1,
id=None,
function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='n"}', name=None),
type=None,
)
],
"finish_reason": None,
"received_at": "2025-02-19T16:02:55.947939",
},
component_info=ComponentInfo(name="test", type="test"),
),
StreamingChunk(
content="",
meta={
"model": "gpt-4o-mini-2024-07-18",
"index": 0,
"tool_calls": None,
"finish_reason": "tool_calls",
"received_at": "2025-02-19T16:02:55.948772",
},
component_info=ComponentInfo(name="test", type="test"),
),
]
# Convert chunks to a chat message
result = _convert_streaming_chunks_to_chat_message(chunks=chunks)
assert not result.texts
assert not result.text
# Verify both tool calls were found and processed
assert len(result.tool_calls) == 2
assert result.tool_calls[0].id == "call_ZOj5l67zhZOx6jqjg7ATQwb6"
assert result.tool_calls[0].tool_name == "rag_pipeline_tool"
assert result.tool_calls[0].arguments == {"query": "Where does Mark live?"}
assert result.tool_calls[1].id == "call_STxsYY69wVOvxWqopAt3uWTB"
assert result.tool_calls[1].tool_name == "get_weather"
assert result.tool_calls[1].arguments == {"city": "Berlin"}
# Verify meta information
assert result.meta["model"] == "gpt-4o-mini-2024-07-18"
assert result.meta["finish_reason"] == "tool_calls"
assert result.meta["index"] == 0
assert result.meta["completion_start_time"] == "2025-02-19T16:02:55.910076"