Improve flaky test and actually test reasoning (#10287)

This commit is contained in:
Sebastian Husch Lee 2025-12-30 10:26:01 +01:00 committed by GitHub
parent e86f016141
commit a249bd32ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1100,11 +1100,15 @@ class TestHuggingFaceAPIChatGenerator:
2. When the assistant message (with reasoning) is sent back in a multi-turn conversation,
the API call succeeds (reasoning is dropped during conversion since HF API doesn't support it)
"""
# Note: Using a model that supports reasoning. DeepSeek-R1-Distill models are available
# via serverless inference and support reasoning output.
# Note: Using a model that supports reasoning AND a provider that actually follows the spec defined in
# huggingface-hub. Reasoning content especially seems to be non-standard across providers and is either left
# in the main response or put in a new field that is not part of the official API.
# One combo that does respect the spec is together + openai/gpt-oss-20b.
# together + openai/gpt-oss-20b actually uses the expected reasoning field in the response
generator = HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
api_params={"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
# We use together + openai/gpt-oss-20b since it actually returns reasoning content in the expected field
api_params={"model": "openai/gpt-oss-20b", "provider": "together"},
generation_kwargs={"max_tokens": 300},
)
@ -1116,13 +1120,14 @@ class TestHuggingFaceAPIChatGenerator:
assert len(response["replies"]) > 0
first_reply = response["replies"][0]
assert first_reply.text is not None
assert first_reply.reasoning is not None
# Second turn: send a follow-up including the assistant's previous response
# This tests that convert_message_to_hf_format properly handles messages
# that may contain ReasoningContent (it should skip it)
follow_up_messages = [
ChatMessage.from_user("What is 2 + 2? Answer briefly."),
first_reply, # Include the assistant's response (may contain reasoning)
first_reply, # Include the assistant's response with reasoning
ChatMessage.from_user("Now what is 3 + 3? Answer briefly."),
]
follow_up_response = generator.run(messages=follow_up_messages)
@ -1131,6 +1136,7 @@ class TestHuggingFaceAPIChatGenerator:
assert "replies" in follow_up_response
assert len(follow_up_response["replies"]) > 0
assert follow_up_response["replies"][0].text is not None
assert follow_up_response["replies"][0].reasoning is not None
def test_hugging_face_api_generator_with_toolset_initialization(self, mock_check_valid_model, tools):
"""Test that the HuggingFaceAPIChatGenerator can be initialized with a Toolset."""