diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index abe3f2e26..dc0f8c706 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -822,6 +822,7 @@ class TestHuggingFaceAPIChatGenerator: reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) @pytest.mark.flaky(reruns=3, reruns_delay=10) + @pytest.mark.asyncio async def test_live_run_async_serverless(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, @@ -829,17 +830,18 @@ class TestHuggingFaceAPIChatGenerator: generation_kwargs={"max_tokens": 20}, ) - # No need for instruction tokens here since we use the chat_completion endpoint which handles the chat - # templating for us. messages = [ ChatMessage.from_user("What is the capital of France? Be concise only provide the capital, nothing else.") ] - response = await generator.run_async(messages=messages) + try: + response = await generator.run_async(messages=messages) - assert "replies" in response - assert isinstance(response["replies"], list) - assert len(response["replies"]) > 0 - assert [isinstance(reply, ChatMessage) for reply in response["replies"]] - assert "usage" in response["replies"][0].meta - assert "prompt_tokens" in response["replies"][0].meta["usage"] - assert "completion_tokens" in response["replies"][0].meta["usage"] + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) > 0 + assert [isinstance(reply, ChatMessage) for reply in response["replies"]] + assert "usage" in response["replies"][0].meta + assert "prompt_tokens" in response["replies"][0].meta["usage"] + assert "completion_tokens" in response["replies"][0].meta["usage"] + finally: + await generator._async_client.close()