diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index d48509637..c5f6f83d2 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -401,6 +401,7 @@ class HuggingFaceAPIChatGenerator: generated_text = "" first_chunk_time = None + meta: Dict[str, Any] = {} for chunk in api_output: # n is unused, so the API always returns only one choice @@ -412,8 +413,6 @@ class HuggingFaceAPIChatGenerator: generated_text += text finish_reason = choice.finish_reason - - meta: Dict[str, Any] = {} if finish_reason: meta["finish_reason"] = finish_reason @@ -426,7 +425,6 @@ class HuggingFaceAPIChatGenerator: meta.update( { "model": self._client.model, - "finish_reason": finish_reason, "index": 0, "usage": {"prompt_tokens": 0, "completion_tokens": 0}, # not available in streaming "completion_start_time": first_chunk_time, @@ -434,7 +432,6 @@ class HuggingFaceAPIChatGenerator: ) message = ChatMessage.from_assistant(text=generated_text, meta=meta) - return {"replies": [message]} def _run_non_streaming( @@ -485,6 +482,7 @@ class HuggingFaceAPIChatGenerator: generated_text = "" first_chunk_time = None + meta: Dict[str, Any] = {} async for chunk in api_output: choice = chunk.choices[0] @@ -493,8 +491,6 @@ class HuggingFaceAPIChatGenerator: generated_text += text finish_reason = choice.finish_reason - - meta: Dict[str, Any] = {} if finish_reason: meta["finish_reason"] = finish_reason @@ -507,7 +503,6 @@ class HuggingFaceAPIChatGenerator: meta.update( { "model": self._async_client.model, - "finish_reason": finish_reason, "index": 0, "usage": {"prompt_tokens": 0, "completion_tokens": 0}, "completion_start_time": first_chunk_time, diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index 64d66c0f9..f376886c5 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -671,9 +671,15 @@ class TestHuggingFaceAPIChatGenerator: assert isinstance(response["replies"], list) assert len(response["replies"]) > 0 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - assert "usage" in response["replies"][0].meta - assert "prompt_tokens" in response["replies"][0].meta["usage"] - assert "completion_tokens" in response["replies"][0].meta["usage"] + assert response["replies"][0].text is not None + meta = response["replies"][0].meta + assert "usage" in meta + assert "prompt_tokens" in meta["usage"] + assert meta["usage"]["prompt_tokens"] > 0 + assert "completion_tokens" in meta["usage"] + assert meta["usage"]["completion_tokens"] > 0 + assert meta["model"] == "microsoft/Phi-3.5-mini-instruct" + assert meta["finish_reason"] is not None @pytest.mark.integration @pytest.mark.slow @@ -701,13 +707,18 @@ class TestHuggingFaceAPIChatGenerator: assert isinstance(response["replies"], list) assert len(response["replies"]) > 0 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] + assert response["replies"][0].text is not None response_meta = response["replies"][0].meta assert "completion_start_time" in response_meta assert datetime.fromisoformat(response_meta["completion_start_time"]) <= datetime.now() assert "usage" in response_meta assert "prompt_tokens" in response_meta["usage"] + assert response_meta["usage"]["prompt_tokens"] == 0 assert "completion_tokens" in response_meta["usage"] + assert response_meta["usage"]["completion_tokens"] == 0 + assert response_meta["model"] == "microsoft/Phi-3.5-mini-instruct" + assert response_meta["finish_reason"] is not None @pytest.mark.integration @pytest.mark.slow @@ -926,9 +937,16 @@ class TestHuggingFaceAPIChatGenerator: assert isinstance(response["replies"], list) assert len(response["replies"]) > 0 assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - assert "usage" in response["replies"][0].meta - assert "prompt_tokens" in response["replies"][0].meta["usage"] - assert "completion_tokens" in response["replies"][0].meta["usage"] + assert response["replies"][0].text is not None + + meta = response["replies"][0].meta + assert "usage" in meta + assert "prompt_tokens" in meta["usage"] + assert meta["usage"]["prompt_tokens"] > 0 + assert "completion_tokens" in meta["usage"] + assert meta["usage"]["completion_tokens"] > 0 + assert meta["model"] == "microsoft/Phi-3.5-mini-instruct" + assert meta["finish_reason"] is not None finally: await generator._async_client.close()