From af9aac2b998ba9cef8bb224cabac8aeb93a9b356 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com> Date: Mon, 11 Aug 2025 13:52:16 +0200 Subject: [PATCH] chore!: Update finish reason in output of `HuggingFaceAPIChatGenerator` to match between stream and non-stream modes (#9686) * Update finish reason * Fix unit test * Add reno * Update releasenotes/notes/update-finish-reason-hf-api-chat-gen-c700042a079733e8.yaml Co-authored-by: Amna Mubashar * Update async as well * Fix unit test --------- Co-authored-by: Amna Mubashar --- .../generators/chat/hugging_face_api.py | 16 ++++++++++++---- ...eason-hf-api-chat-gen-c700042a079733e8.yaml | 18 ++++++++++++++++++ .../generators/chat/test_hugging_face_api.py | 4 ++-- 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 releasenotes/notes/update-finish-reason-hf-api-chat-gen-c700042a079733e8.yaml diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index c12a5192b..6526fdad9 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -40,6 +40,7 @@ with LazyImport(message="Run 'pip install \"huggingface_hub[inference]>=0.27.0\" ChatCompletionInputStreamOptions, ChatCompletionInputTool, ChatCompletionOutput, + ChatCompletionOutputComplete, ChatCompletionOutputToolCall, ChatCompletionStreamOutput, ChatCompletionStreamOutputChoice, @@ -112,7 +113,9 @@ def _convert_tools_to_hfapi_tools( return hf_tools -def _map_hf_finish_reason_to_haystack(choice: "ChatCompletionStreamOutputChoice") -> Optional[FinishReason]: +def _map_hf_finish_reason_to_haystack( + choice: Union["ChatCompletionStreamOutputChoice", "ChatCompletionOutputComplete"], +) -> Optional[FinishReason]: """ Map HuggingFace finish reasons to Haystack FinishReason literals. @@ -133,7 +136,10 @@ def _map_hf_finish_reason_to_haystack(choice: "ChatCompletionStreamOutputChoice" return None # Check if this choice contains tool call information - has_tool_calls = choice.delta.tool_calls is not None or choice.delta.tool_call_id is not None + if isinstance(choice, ChatCompletionStreamOutputChoice): + has_tool_calls = choice.delta.tool_calls is not None or choice.delta.tool_call_id is not None + else: + has_tool_calls = choice.message.tool_calls is not None or choice.message.tool_call_id is not None # If we detect tool calls, override the finish reason if has_tool_calls: @@ -565,9 +571,10 @@ class HuggingFaceAPIChatGenerator: tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls) + mapped_finish_reason = _map_hf_finish_reason_to_haystack(choice) if choice.finish_reason else None meta: dict[str, Any] = { "model": self._client.model, - "finish_reason": choice.finish_reason, + "finish_reason": mapped_finish_reason, "index": choice.index, } @@ -629,9 +636,10 @@ class HuggingFaceAPIChatGenerator: tool_calls = _convert_hfapi_tool_calls(choice.message.tool_calls) + mapped_finish_reason = _map_hf_finish_reason_to_haystack(choice) if choice.finish_reason else None meta: dict[str, Any] = { "model": self._async_client.model, - "finish_reason": choice.finish_reason, + "finish_reason": mapped_finish_reason, "index": choice.index, } diff --git a/releasenotes/notes/update-finish-reason-hf-api-chat-gen-c700042a079733e8.yaml b/releasenotes/notes/update-finish-reason-hf-api-chat-gen-c700042a079733e8.yaml new file mode 100644 index 000000000..54c0438dd --- /dev/null +++ b/releasenotes/notes/update-finish-reason-hf-api-chat-gen-c700042a079733e8.yaml @@ -0,0 +1,18 @@ +--- +upgrade: + - | + The `finish_reason` field behavior in `HuggingFaceAPIChatGenerator` has been + updated. Previously, the new `finish_reason` mapping (introduced in Haystack 2.15.0 release) was only applied when streaming was enabled. When streaming was disabled, + the old `finish_reason` was still returned. This change ensures the updated + `finish_reason` values are consistently returned regardless of streaming mode. + + **How to know if you're affected:** + If you rely on `finish_reason` in responses from `HuggingFaceAPIChatGenerator` + with streaming disabled, you may see different values after this upgrade. + + **What to do:** + Review the updated mapping: + - `length` → `length` + - `eos_token` → `stop` + - `stop_sequence` → `stop` + - If tool calls are present → `tool_calls` diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index 7f6e844aa..f61d9bf01 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -587,7 +587,7 @@ class TestHuggingFaceAPIChatGenerator: assert response["replies"][0].tool_calls[0].arguments == {"city": "Paris"} assert response["replies"][0].tool_calls[0].id == "0" assert response["replies"][0].meta == { - "finish_reason": "stop", + "finish_reason": "tool_calls", "index": 0, "model": "meta-llama/Llama-3.1-70B-Instruct", "usage": {"completion_tokens": 30, "prompt_tokens": 426}, @@ -1040,7 +1040,7 @@ class TestHuggingFaceAPIChatGenerator: assert response["replies"][0].tool_calls[0].arguments == {"city": "Paris"} assert response["replies"][0].tool_calls[0].id == "0" assert response["replies"][0].meta == { - "finish_reason": "stop", + "finish_reason": "tool_calls", "index": 0, "model": "meta-llama/Llama-3.1-70B-Instruct", "usage": {"completion_tokens": 30, "prompt_tokens": 426},