refactor: Refactor hf api chat generator (#9449)

* Refactor HFAPI Chat Generator * Add component info to generators * Fix type hint * Add reno * Fix unit tests * Remove incorrect dev comment * Move _convert_streaming_chunks_to_chat_message to utils file
2025-11-13 08:33:57 +00:00 · 2025-05-27 15:55:06 +02:00 · 2025-05-27 15:55:06 +02:00 · 81c0cefa41
commit 81c0cefa41
parent 3deaa20cb6
11 changed files with 504 additions and 464 deletions
--- a/haystack/components/generators/chat/hugging_face_api.py
+++ b/haystack/components/generators/chat/hugging_face_api.py
@ -7,6 +7,7 @@ from datetime import datetime
 from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Union

 from haystack import component, default_from_dict, default_to_dict, logging
+from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
 from haystack.dataclasses import ChatMessage, ComponentInfo, StreamingChunk, ToolCall, select_streaming_callback
 from haystack.dataclasses.streaming_chunk import StreamingCallbackT
 from haystack.lazy_imports import LazyImport
@ -101,6 +102,35 @@ def _convert_tools_to_hfapi_tools(
    return hf_tools


+def _convert_chat_completion_stream_output_to_streaming_chunk(
+    chunk: "ChatCompletionStreamOutput", component_info: Optional[ComponentInfo] = None
+) -> StreamingChunk:
+    """
+    Converts the Hugging Face API ChatCompletionStreamOutput to a StreamingChunk.
+    """
+    # Choices is empty if include_usage is set to True where the usage information is returned.
+    if len(chunk.choices) == 0:
+        usage = None
+        if chunk.usage:
+            usage = {"prompt_tokens": chunk.usage.prompt_tokens, "completion_tokens": chunk.usage.completion_tokens}
+        return StreamingChunk(
+            content="",
+            meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "usage": usage},
+            component_info=component_info,
+        )
+
+    # n is unused, so the API always returns only one choice
+    # the argument is probably allowed for compatibility with OpenAI
+    # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
+    choice = chunk.choices[0]
+    stream_chunk = StreamingChunk(
+        content=choice.delta.content or "",
+        meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "finish_reason": choice.finish_reason},
+        component_info=component_info,
+    )
+    return stream_chunk
+
+
@component
 class HuggingFaceAPIChatGenerator:
    """
@ -403,55 +433,19 @@ class HuggingFaceAPIChatGenerator:
            **generation_kwargs,
        )

-        generated_text = ""
-        first_chunk_time = None
-        finish_reason = None
-        usage = None
-        meta: Dict[str, Any] = {}
-
-        # get the component name and type
        component_info = ComponentInfo.from_component(self)
-
-        # Set up streaming handler
+        streaming_chunks = []
        for chunk in api_output:
-            # The chunk with usage returns an empty array for choices
-            if len(chunk.choices) > 0:
-                # n is unused, so the API always returns only one choice
-                # the argument is probably allowed for compatibility with OpenAI
-                # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
-                choice = chunk.choices[0]
+            streaming_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(
+                chunk=chunk, component_info=component_info
+            )
+            streaming_chunks.append(streaming_chunk)
+            streaming_callback(streaming_chunk)

-                text = choice.delta.content or ""
-                generated_text += text
+        message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks)
+        if message.meta.get("usage") is None:
+            message.meta["usage"] = {"prompt_tokens": 0, "completion_tokens": 0}

-                if choice.finish_reason:
-                    finish_reason = choice.finish_reason
-
-                stream_chunk = StreamingChunk(content=text, meta=meta, component_info=component_info)
-                streaming_callback(stream_chunk)
-
-            if chunk.usage:
-                usage = chunk.usage
-
-            if first_chunk_time is None:
-                first_chunk_time = datetime.now().isoformat()
-
-        if usage:
-            usage_dict = {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens}
-        else:
-            usage_dict = {"prompt_tokens": 0, "completion_tokens": 0}
-
-        meta.update(
-            {
-                "model": self._client.model,
-                "index": 0,
-                "finish_reason": finish_reason,
-                "usage": usage_dict,
-                "completion_start_time": first_chunk_time,
-            }
-        )
-
-        message = ChatMessage.from_assistant(text=generated_text, meta=meta)
        return {"replies": [message]}

    def _run_non_streaming(
@ -503,51 +497,19 @@ class HuggingFaceAPIChatGenerator:
            **generation_kwargs,
        )

-        generated_text = ""
-        first_chunk_time = None
-        finish_reason = None
-        usage = None
-        meta: Dict[str, Any] = {}
-
-        # get the component name and type
        component_info = ComponentInfo.from_component(self)
-
+        streaming_chunks = []
        async for chunk in api_output:
-            # The chunk with usage returns an empty array for choices
-            if len(chunk.choices) > 0:
-                # n is unused, so the API always returns only one choice
-                # the argument is probably allowed for compatibility with OpenAI
-                # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n
-                choice = chunk.choices[0]
+            stream_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(
+                chunk=chunk, component_info=component_info
+            )
+            streaming_chunks.append(stream_chunk)
+            await streaming_callback(stream_chunk)  # type: ignore

-                text = choice.delta.content or ""
-                generated_text += text
+        message = _convert_streaming_chunks_to_chat_message(chunks=streaming_chunks)
+        if message.meta.get("usage") is None:
+            message.meta["usage"] = {"prompt_tokens": 0, "completion_tokens": 0}

-                stream_chunk = StreamingChunk(content=text, meta=meta, component_info=component_info)
-                await streaming_callback(stream_chunk)  # type: ignore
-
-            if chunk.usage:
-                usage = chunk.usage
-
-            if first_chunk_time is None:
-                first_chunk_time = datetime.now().isoformat()
-
-        if usage:
-            usage_dict = {"prompt_tokens": usage.prompt_tokens, "completion_tokens": usage.completion_tokens}
-        else:
-            usage_dict = {"prompt_tokens": 0, "completion_tokens": 0}
-
-        meta.update(
-            {
-                "model": self._async_client.model,
-                "index": 0,
-                "finish_reason": finish_reason,
-                "usage": usage_dict,
-                "completion_start_time": first_chunk_time,
-            }
-        )
-
-        message = ChatMessage.from_assistant(text=generated_text, meta=meta)
        return {"replies": [message]}

    async def _run_non_streaming_async(
--- a/haystack/components/generators/chat/hugging_face_local.py
+++ b/haystack/components/generators/chat/hugging_face_local.py
@ -389,7 +389,10 @@ class HuggingFaceLocalChatGenerator:
            component_info = ComponentInfo.from_component(self)
            # streamer parameter hooks into HF streaming, HFTokenStreamingHandler is an adapter to our streaming
            generation_kwargs["streamer"] = HFTokenStreamingHandler(
-                tokenizer, streaming_callback, stop_words, component_info
+                tokenizer=tokenizer,
+                stream_handler=streaming_callback,
+                stop_words=stop_words,
+                component_info=component_info,
            )

        # convert messages to HF format
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@ -13,6 +13,7 @@ from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice

 from haystack import component, default_from_dict, default_to_dict, logging
+from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
 from haystack.dataclasses import (
    AsyncStreamingCallbackT,
    ChatMessage,
@ -455,69 +456,6 @@ def _check_finish_reason(meta: Dict[str, Any]) -> None:
        )


-def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> ChatMessage:
-    """
-    Connects the streaming chunks into a single ChatMessage.
-
-    :param chunks: The list of all `StreamingChunk` objects.
-
-    :returns: The ChatMessage.
-    """
-    text = "".join([chunk.content for chunk in chunks])
-    tool_calls = []
-
-    # Process tool calls if present in any chunk
-    tool_call_data: Dict[str, Dict[str, str]] = {}  # Track tool calls by index
-    for chunk_payload in chunks:
-        tool_calls_meta = chunk_payload.meta.get("tool_calls")
-        if tool_calls_meta is not None:
-            for delta in tool_calls_meta:
-                # We use the index of the tool call to track it across chunks since the ID is not always provided
-                if delta.index not in tool_call_data:
-                    tool_call_data[delta.index] = {"id": "", "name": "", "arguments": ""}
-
-                # Save the ID if present
-                if delta.id is not None:
-                    tool_call_data[delta.index]["id"] = delta.id
-
-                if delta.function is not None:
-                    if delta.function.name is not None:
-                        tool_call_data[delta.index]["name"] += delta.function.name
-                    if delta.function.arguments is not None:
-                        tool_call_data[delta.index]["arguments"] += delta.function.arguments
-
-    # Convert accumulated tool call data into ToolCall objects
-    for call_data in tool_call_data.values():
-        try:
-            arguments = json.loads(call_data["arguments"])
-            tool_calls.append(ToolCall(id=call_data["id"], tool_name=call_data["name"], arguments=arguments))
-        except json.JSONDecodeError:
-            logger.warning(
-                "OpenAI returned a malformed JSON string for tool call arguments. This tool call "
-                "will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
-                "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
-                _id=call_data["id"],
-                _name=call_data["name"],
-                _arguments=call_data["arguments"],
-            )
-
-    # finish_reason can appear in different places so we look for the last one
-    finish_reasons = [
-        chunk.meta.get("finish_reason") for chunk in chunks if chunk.meta.get("finish_reason") is not None
-    ]
-    finish_reason = finish_reasons[-1] if finish_reasons else None
-
-    meta = {
-        "model": chunks[-1].meta.get("model"),
-        "index": 0,
-        "finish_reason": finish_reason,
-        "completion_start_time": chunks[0].meta.get("received_at"),  # first chunk received
-        "usage": chunks[-1].meta.get("usage"),  # last chunk has the final usage data if available
-    }
-
-    return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)
-
-
 def _convert_chat_completion_to_chat_message(completion: ChatCompletion, choice: Choice) -> ChatMessage:
    """
    Converts the non-streaming response from the OpenAI API to a ChatMessage.
--- a/haystack/components/generators/hugging_face_api.py
+++ b/haystack/components/generators/hugging_face_api.py
@ -7,7 +7,7 @@ from datetime import datetime
 from typing import Any, Dict, Iterable, List, Optional, Union, cast

 from haystack import component, default_from_dict, default_to_dict
-from haystack.dataclasses import StreamingCallbackT, StreamingChunk, select_streaming_callback
+from haystack.dataclasses import ComponentInfo, StreamingCallbackT, StreamingChunk, select_streaming_callback
 from haystack.lazy_imports import LazyImport
 from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
 from haystack.utils.hf import HFGenerationAPIType, HFModelType, check_valid_model
@ -220,6 +220,7 @@ class HuggingFaceAPIGenerator:
        chunks: List[StreamingChunk] = []
        first_chunk_time = None

+        component_info = ComponentInfo.from_component(self)
        for chunk in hf_output:
            token: TextGenerationStreamOutputToken = chunk.token
            if token.special:
@ -229,7 +230,7 @@ class HuggingFaceAPIGenerator:
            if first_chunk_time is None:
                first_chunk_time = datetime.now().isoformat()

-            stream_chunk = StreamingChunk(token.text, chunk_metadata)
+            stream_chunk = StreamingChunk(content=token.text, meta=chunk_metadata, component_info=component_info)
            chunks.append(stream_chunk)
            streaming_callback(stream_chunk)

--- a/haystack/components/generators/hugging_face_local.py
+++ b/haystack/components/generators/hugging_face_local.py
@ -5,7 +5,7 @@
 from typing import Any, Dict, List, Literal, Optional, cast

 from haystack import component, default_from_dict, default_to_dict, logging
-from haystack.dataclasses import StreamingCallbackT, select_streaming_callback
+from haystack.dataclasses import ComponentInfo, StreamingCallbackT, select_streaming_callback
 from haystack.lazy_imports import LazyImport
 from haystack.utils import (
    ComponentDevice,
@ -256,9 +256,10 @@ class HuggingFaceLocalGenerator:
                updated_generation_kwargs["num_return_sequences"] = 1
            # streamer parameter hooks into HF streaming, HFTokenStreamingHandler is an adapter to our streaming
            updated_generation_kwargs["streamer"] = HFTokenStreamingHandler(
-                self.pipeline.tokenizer,  # type: ignore
-                streaming_callback,
-                self.stop_words,  # type: ignore
+                tokenizer=self.pipeline.tokenizer,  # type: ignore
+                stream_handler=streaming_callback,
+                stop_words=self.stop_words,  # type: ignore
+                component_info=ComponentInfo.from_component(self),
            )

        output = self.pipeline(prompt, stopping_criteria=self.stopping_criteria_list, **updated_generation_kwargs)  # type: ignore
--- a/haystack/components/generators/openai.py
+++ b/haystack/components/generators/openai.py
@ -13,8 +13,8 @@ from haystack.components.generators.chat.openai import (
    _check_finish_reason,
    _convert_chat_completion_chunk_to_streaming_chunk,
    _convert_chat_completion_to_chat_message,
-    _convert_streaming_chunks_to_chat_message,
 )
+from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
 from haystack.dataclasses import (
    ChatMessage,
    ComponentInfo,
--- a/haystack/components/generators/utils.py
+++ b/haystack/components/generators/utils.py
@ -2,11 +2,15 @@
 #
 # SPDX-License-Identifier: Apache-2.0

-from typing import Any, Dict
+import json
+from typing import Any, Dict, List

 from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall

-from haystack.dataclasses import StreamingChunk
+from haystack import logging
+from haystack.dataclasses import ChatMessage, StreamingChunk, ToolCall
+
+logger = logging.getLogger(__name__)


 def print_streaming_chunk(chunk: StreamingChunk) -> None:
@ -53,3 +57,66 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None:
    # This ensures spacing between multiple LLM messages (e.g. Agent)
    if chunk.meta.get("finish_reason") is not None:
        print("\n\n", flush=True, end="")
+
+
+def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> ChatMessage:
+    """
+    Connects the streaming chunks into a single ChatMessage.
+
+    :param chunks: The list of all `StreamingChunk` objects.
+
+    :returns: The ChatMessage.
+    """
+    text = "".join([chunk.content for chunk in chunks])
+    tool_calls = []
+
+    # Process tool calls if present in any chunk
+    tool_call_data: Dict[str, Dict[str, str]] = {}  # Track tool calls by index
+    for chunk_payload in chunks:
+        tool_calls_meta = chunk_payload.meta.get("tool_calls")
+        if tool_calls_meta is not None:
+            for delta in tool_calls_meta:
+                # We use the index of the tool call to track it across chunks since the ID is not always provided
+                if delta.index not in tool_call_data:
+                    tool_call_data[delta.index] = {"id": "", "name": "", "arguments": ""}
+
+                # Save the ID if present
+                if delta.id is not None:
+                    tool_call_data[delta.index]["id"] = delta.id
+
+                if delta.function is not None:
+                    if delta.function.name is not None:
+                        tool_call_data[delta.index]["name"] += delta.function.name
+                    if delta.function.arguments is not None:
+                        tool_call_data[delta.index]["arguments"] += delta.function.arguments
+
+    # Convert accumulated tool call data into ToolCall objects
+    for call_data in tool_call_data.values():
+        try:
+            arguments = json.loads(call_data["arguments"])
+            tool_calls.append(ToolCall(id=call_data["id"], tool_name=call_data["name"], arguments=arguments))
+        except json.JSONDecodeError:
+            logger.warning(
+                "OpenAI returned a malformed JSON string for tool call arguments. This tool call "
+                "will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
+                "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
+                _id=call_data["id"],
+                _name=call_data["name"],
+                _arguments=call_data["arguments"],
+            )
+
+    # finish_reason can appear in different places so we look for the last one
+    finish_reasons = [
+        chunk.meta.get("finish_reason") for chunk in chunks if chunk.meta.get("finish_reason") is not None
+    ]
+    finish_reason = finish_reasons[-1] if finish_reasons else None
+
+    meta = {
+        "model": chunks[-1].meta.get("model"),
+        "index": 0,
+        "finish_reason": finish_reason,
+        "completion_start_time": chunks[0].meta.get("received_at"),  # first chunk received
+        "usage": chunks[-1].meta.get("usage"),  # last chunk has the final usage data if available
+    }
+
+    return ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, meta=meta)
--- a/releasenotes/notes/update-streaming-chunk-hf-api-7eba8fdf6e4fa411.yaml
+++ b/releasenotes/notes/update-streaming-chunk-hf-api-7eba8fdf6e4fa411.yaml
@ -0,0 +1,5 @@
+---
+enhancements:
+  - |
+    - Refactors the HuggingFaceAPIChatGenerator to use the util method `_convert_streaming_chunks_to_chat_message`. This is to help with being consistent for how we convert StreamingChunks into a final ChatMessage.
+    - We also add ComponentInfo to the StreamingChunks made in `HuggingFaceGenerator`, and `HugginFaceLocalGenerator` so we can tell from which component a stream is coming from.
--- a/test/components/generators/chat/test_hugging_face_api.py
+++ b/test/components/generators/chat/test_hugging_face_api.py
@ -24,6 +24,7 @@ from huggingface_hub import (
    ChatCompletionStreamOutputChoice,
    ChatCompletionStreamOutputDelta,
    ChatCompletionInputStreamOptions,
+    ChatCompletionStreamOutputUsage,
 )
 from huggingface_hub.errors import RepositoryNotFoundError

@ -31,6 +32,7 @@ from haystack.components.generators.chat.hugging_face_api import (
    HuggingFaceAPIChatGenerator,
    _convert_hfapi_tool_calls,
    _convert_tools_to_hfapi_tools,
+    _convert_chat_completion_stream_output_to_streaming_chunk,
 )

 from haystack.tools import Tool
@ -661,6 +663,80 @@ class TestHuggingFaceAPIChatGenerator:
        tool_calls = _convert_hfapi_tool_calls(hfapi_tool_calls)
        assert len(tool_calls) == 0

+    @pytest.mark.parametrize(
+        "hf_stream_output, expected_stream_chunk",
+        [
+            (
+                ChatCompletionStreamOutput(
+                    choices=[
+                        ChatCompletionStreamOutputChoice(
+                            delta=ChatCompletionStreamOutputDelta(role="assistant", content=" Paris"), index=0
+                        )
+                    ],
+                    created=1748339326,
+                    id="",
+                    model="microsoft/Phi-3.5-mini-instruct",
+                    system_fingerprint="3.2.1-sha-4d28897",
+                ),
+                StreamingChunk(
+                    content=" Paris",
+                    meta={
+                        "received_at": "2025-05-27T12:14:28.228852",
+                        "model": "microsoft/Phi-3.5-mini-instruct",
+                        "finish_reason": None,
+                    },
+                ),
+            ),
+            (
+                ChatCompletionStreamOutput(
+                    choices=[
+                        ChatCompletionStreamOutputChoice(
+                            delta=ChatCompletionStreamOutputDelta(role="assistant", content=""),
+                            index=0,
+                            finish_reason="stop",
+                        )
+                    ],
+                    created=1748339326,
+                    id="",
+                    model="microsoft/Phi-3.5-mini-instruct",
+                    system_fingerprint="3.2.1-sha-4d28897",
+                ),
+                StreamingChunk(
+                    content="",
+                    meta={
+                        "received_at": "2025-05-27T12:14:28.228852",
+                        "model": "microsoft/Phi-3.5-mini-instruct",
+                        "finish_reason": "stop",
+                    },
+                ),
+            ),
+            (
+                ChatCompletionStreamOutput(
+                    choices=[],
+                    created=1748339326,
+                    id="",
+                    model="microsoft/Phi-3.5-mini-instruct",
+                    system_fingerprint="3.2.1-sha-4d28897",
+                    usage=ChatCompletionStreamOutputUsage(completion_tokens=2, prompt_tokens=21, total_tokens=23),
+                ),
+                StreamingChunk(
+                    content="",
+                    meta={
+                        "received_at": "2025-05-27T12:14:28.228852",
+                        "model": "microsoft/Phi-3.5-mini-instruct",
+                        "usage": {"completion_tokens": 2, "prompt_tokens": 21},
+                    },
+                ),
+            ),
+        ],
+    )
+    def test_convert_chat_completion_stream_output_to_streaming_chunk(self, hf_stream_output, expected_stream_chunk):
+        converted_stream_chunk = _convert_chat_completion_stream_output_to_streaming_chunk(chunk=hf_stream_output)
+        # Remove timestamp from comparison since it's always the current time
+        converted_stream_chunk.meta.pop("received_at", None)
+        expected_stream_chunk.meta.pop("received_at", None)
+        assert converted_stream_chunk == expected_stream_chunk
+
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.skipif(
--- a/test/components/generators/chat/test_openai.py
+++ b/test/components/generators/chat/test_openai.py
@ -20,14 +20,13 @@ from openai.types.chat import chat_completion_chunk

 from haystack import component
 from haystack.components.generators.utils import print_streaming_chunk
-from haystack.dataclasses import StreamingChunk, ComponentInfo
+from haystack.dataclasses import StreamingChunk
 from haystack.utils.auth import Secret
 from haystack.dataclasses import ChatMessage, ToolCall
 from haystack.tools import ComponentTool, Tool
 from haystack.components.generators.chat.openai import (
    OpenAIChatGenerator,
    _check_finish_reason,
-    _convert_streaming_chunks_to_chat_message,
    _convert_chat_completion_chunk_to_streaming_chunk,
 )
 from haystack.tools.toolset import Toolset
@ -598,309 +597,6 @@ class TestOpenAIChatGenerator:
        assert message.meta["finish_reason"] == "tool_calls"
        assert message.meta["usage"]["completion_tokens"] == 47

-    def test_convert_streaming_chunks_to_chat_message_tool_calls_in_any_chunk(self):
-        chunk = chat_completion_chunk.ChatCompletionChunk(
-            id="chatcmpl-B2g1XYv1WzALulC5c8uLtJgvEB48I",
-            choices=[
-                chat_completion_chunk.Choice(
-                    delta=chat_completion_chunk.ChoiceDelta(
-                        content=None, function_call=None, refusal=None, role=None, tool_calls=None
-                    ),
-                    finish_reason="tool_calls",
-                    index=0,
-                    logprobs=None,
-                )
-            ],
-            created=1739977895,
-            model="gpt-4o-mini-2024-07-18",
-            object="chat.completion.chunk",
-            service_tier="default",
-            system_fingerprint="fp_00428b782a",
-            usage=None,
-        )
-        chunks = [
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": None,
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.910076",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id="call_ZOj5l67zhZOx6jqjg7ATQwb6",
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(
-                                arguments="", name="rag_pipeline_tool"
-                            ),
-                            type="function",
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.913919",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"qu', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.914439",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ery":', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.924146",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments=' "Wher', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.924420",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="e do", name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.944398",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="es Ma", name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.944958",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="rk liv", name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.945507",
-                },
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=0,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='e?"}', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.946018",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=1,
-                            id="call_STxsYY69wVOvxWqopAt3uWTB",
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(
-                                arguments="", name="get_weather"
-                            ),
-                            type="function",
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.946578",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=1,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"ci', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.946981",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=1,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ty": ', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.947411",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=1,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='"Berli', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.947643",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": [
-                        chat_completion_chunk.ChoiceDeltaToolCall(
-                            index=1,
-                            id=None,
-                            function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='n"}', name=None),
-                            type=None,
-                        )
-                    ],
-                    "finish_reason": None,
-                    "received_at": "2025-02-19T16:02:55.947939",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-            StreamingChunk(
-                content="",
-                meta={
-                    "model": "gpt-4o-mini-2024-07-18",
-                    "index": 0,
-                    "tool_calls": None,
-                    "finish_reason": "tool_calls",
-                    "received_at": "2025-02-19T16:02:55.948772",
-                },
-                component_info=ComponentInfo(name="test", type="test"),
-            ),
-        ]
-
-        # Convert chunks to a chat message
-        result = _convert_streaming_chunks_to_chat_message(chunks=chunks)
-
-        assert not result.texts
-        assert not result.text
-
-        # Verify both tool calls were found and processed
-        assert len(result.tool_calls) == 2
-        assert result.tool_calls[0].id == "call_ZOj5l67zhZOx6jqjg7ATQwb6"
-        assert result.tool_calls[0].tool_name == "rag_pipeline_tool"
-        assert result.tool_calls[0].arguments == {"query": "Where does Mark live?"}
-        assert result.tool_calls[1].id == "call_STxsYY69wVOvxWqopAt3uWTB"
-        assert result.tool_calls[1].tool_name == "get_weather"
-        assert result.tool_calls[1].arguments == {"city": "Berlin"}
-
-        # Verify meta information
-        assert result.meta["model"] == "gpt-4o-mini-2024-07-18"
-        assert result.meta["finish_reason"] == "tool_calls"
-        assert result.meta["index"] == 0
-        assert result.meta["completion_start_time"] == "2025-02-19T16:02:55.910076"
-
    def test_convert_usage_chunk_to_streaming_chunk(self):
        chunk = ChatCompletionChunk(
            id="chatcmpl-BC1y4wqIhe17R8sv3lgLcWlB4tXCw",
--- a/test/components/generators/test_utils.py
+++ b/test/components/generators/test_utils.py
@ -0,0 +1,291 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from openai.types.chat import chat_completion_chunk
+
+from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
+from haystack.dataclasses import ComponentInfo, StreamingChunk
+
+
+def test_convert_streaming_chunks_to_chat_message_tool_calls_in_any_chunk():
+    chunks = [
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": None,
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.910076",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id="call_ZOj5l67zhZOx6jqjg7ATQwb6",
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(
+                            arguments="", name="rag_pipeline_tool"
+                        ),
+                        type="function",
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.913919",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"qu', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.914439",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ery":', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.924146",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments=' "Wher', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.924420",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="e do", name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.944398",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="es Ma", name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.944958",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="rk liv", name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.945507",
+            },
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=0,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='e?"}', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.946018",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=1,
+                        id="call_STxsYY69wVOvxWqopAt3uWTB",
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments="", name="get_weather"),
+                        type="function",
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.946578",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=1,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='{"ci', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.946981",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=1,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='ty": ', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.947411",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=1,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='"Berli', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.947643",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": [
+                    chat_completion_chunk.ChoiceDeltaToolCall(
+                        index=1,
+                        id=None,
+                        function=chat_completion_chunk.ChoiceDeltaToolCallFunction(arguments='n"}', name=None),
+                        type=None,
+                    )
+                ],
+                "finish_reason": None,
+                "received_at": "2025-02-19T16:02:55.947939",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+        StreamingChunk(
+            content="",
+            meta={
+                "model": "gpt-4o-mini-2024-07-18",
+                "index": 0,
+                "tool_calls": None,
+                "finish_reason": "tool_calls",
+                "received_at": "2025-02-19T16:02:55.948772",
+            },
+            component_info=ComponentInfo(name="test", type="test"),
+        ),
+    ]
+
+    # Convert chunks to a chat message
+    result = _convert_streaming_chunks_to_chat_message(chunks=chunks)
+
+    assert not result.texts
+    assert not result.text
+
+    # Verify both tool calls were found and processed
+    assert len(result.tool_calls) == 2
+    assert result.tool_calls[0].id == "call_ZOj5l67zhZOx6jqjg7ATQwb6"
+    assert result.tool_calls[0].tool_name == "rag_pipeline_tool"
+    assert result.tool_calls[0].arguments == {"query": "Where does Mark live?"}
+    assert result.tool_calls[1].id == "call_STxsYY69wVOvxWqopAt3uWTB"
+    assert result.tool_calls[1].tool_name == "get_weather"
+    assert result.tool_calls[1].arguments == {"city": "Berlin"}
+
+    # Verify meta information
+    assert result.meta["model"] == "gpt-4o-mini-2024-07-18"
+    assert result.meta["finish_reason"] == "tool_calls"
+    assert result.meta["index"] == 0
+    assert result.meta["completion_start_time"] == "2025-02-19T16:02:55.910076"