From 4a87ceb0ed25ff9979294ae356517570b03e0ecf Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Wed, 5 Mar 2025 15:53:26 +0100 Subject: [PATCH] Use Phi isntead (#8982) --- .../generators/chat/test_hugging_face_api.py | 6 +++--- test/components/generators/test_hugging_face_api.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index 872072aa8..abe3f2e26 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -570,7 +570,7 @@ class TestHuggingFaceAPIChatGenerator: def test_live_run_serverless(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, - api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"}, + api_params={"model": "microsoft/Phi-3.5-mini-instruct"}, generation_kwargs={"max_tokens": 20}, ) @@ -598,7 +598,7 @@ class TestHuggingFaceAPIChatGenerator: def test_live_run_serverless_streaming(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, - api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"}, + api_params={"model": "microsoft/Phi-3.5-mini-instruct"}, generation_kwargs={"max_tokens": 20}, streaming_callback=streaming_callback_handler, ) @@ -825,7 +825,7 @@ class TestHuggingFaceAPIChatGenerator: async def test_live_run_async_serverless(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, - api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"}, + api_params={"model": "microsoft/Phi-3.5-mini-instruct"}, generation_kwargs={"max_tokens": 20}, ) diff --git a/test/components/generators/test_hugging_face_api.py b/test/components/generators/test_hugging_face_api.py index 83fd12c48..8cf6d5b8a 100644 --- a/test/components/generators/test_hugging_face_api.py +++ b/test/components/generators/test_hugging_face_api.py @@ -298,14 +298,14 @@ class TestHuggingFaceAPIGenerator: def test_run_serverless(self): generator = HuggingFaceAPIGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, - api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"}, + api_params={"model": "microsoft/Phi-3.5-mini-instruct"}, generation_kwargs={"max_new_tokens": 20}, ) # You must include the instruction tokens in the prompt. HF does not add them automatically. # Without them the model will behave erratically. response = generator.run( - "[INST] What is the capital of France? Be concise only provide the capital, nothing else.[/INST]" + "<|user|>\nWhat is the capital of France? Be concise only provide the capital, nothing else.<|end|>\n<|assistant|>\n" ) # Assert that the response contains the generated replies @@ -329,12 +329,14 @@ class TestHuggingFaceAPIGenerator: def test_live_run_streaming_check_completion_start_time(self): generator = HuggingFaceAPIGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, - api_params={"model": "HuggingFaceH4/zephyr-7b-beta"}, + api_params={"model": "microsoft/Phi-3.5-mini-instruct"}, generation_kwargs={"max_new_tokens": 30}, streaming_callback=streaming_callback_handler, ) - results = generator.run("You are a helpful agent that answers questions. What is the capital of France?") + results = generator.run( + "<|user|>\nWhat is the capital of France? Be concise only provide the capital, nothing else.<|end|>\n<|assistant|>\n" + ) # Assert that the response contains the generated replies assert "replies" in results