From 4a87ceb0ed25ff9979294ae356517570b03e0ecf Mon Sep 17 00:00:00 2001
From: Sebastian Husch Lee <sjrl@users.noreply.github.com>
Date: Wed, 5 Mar 2025 15:53:26 +0100
Subject: [PATCH] Use Phi isntead (#8982)

---
 .../generators/chat/test_hugging_face_api.py           |  6 +++---
 test/components/generators/test_hugging_face_api.py    | 10 ++++++----
 2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py
index 872072aa8..abe3f2e26 100644
--- a/test/components/generators/chat/test_hugging_face_api.py
+++ b/test/components/generators/chat/test_hugging_face_api.py
@@ -570,7 +570,7 @@ class TestHuggingFaceAPIChatGenerator:
     def test_live_run_serverless(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
-            api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"},
+            api_params={"model": "microsoft/Phi-3.5-mini-instruct"},
             generation_kwargs={"max_tokens": 20},
         )
 
@@ -598,7 +598,7 @@ class TestHuggingFaceAPIChatGenerator:
     def test_live_run_serverless_streaming(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
-            api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"},
+            api_params={"model": "microsoft/Phi-3.5-mini-instruct"},
             generation_kwargs={"max_tokens": 20},
             streaming_callback=streaming_callback_handler,
         )
@@ -825,7 +825,7 @@ class TestHuggingFaceAPIChatGenerator:
     async def test_live_run_async_serverless(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
-            api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"},
+            api_params={"model": "microsoft/Phi-3.5-mini-instruct"},
             generation_kwargs={"max_tokens": 20},
         )
 
diff --git a/test/components/generators/test_hugging_face_api.py b/test/components/generators/test_hugging_face_api.py
index 83fd12c48..8cf6d5b8a 100644
--- a/test/components/generators/test_hugging_face_api.py
+++ b/test/components/generators/test_hugging_face_api.py
@@ -298,14 +298,14 @@ class TestHuggingFaceAPIGenerator:
     def test_run_serverless(self):
         generator = HuggingFaceAPIGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
-            api_params={"model": "mistralai/Mistral-7B-Instruct-v0.3"},
+            api_params={"model": "microsoft/Phi-3.5-mini-instruct"},
             generation_kwargs={"max_new_tokens": 20},
         )
 
         # You must include the instruction tokens in the prompt. HF does not add them automatically.
         # Without them the model will behave erratically.
         response = generator.run(
-            "<s>[INST] What is the capital of France? Be concise only provide the capital, nothing else.[/INST]"
+            "<|user|>\nWhat is the capital of France? Be concise only provide the capital, nothing else.<|end|>\n<|assistant|>\n"
         )
 
         # Assert that the response contains the generated replies
@@ -329,12 +329,14 @@ class TestHuggingFaceAPIGenerator:
     def test_live_run_streaming_check_completion_start_time(self):
         generator = HuggingFaceAPIGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
-            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
+            api_params={"model": "microsoft/Phi-3.5-mini-instruct"},
             generation_kwargs={"max_new_tokens": 30},
             streaming_callback=streaming_callback_handler,
         )
 
-        results = generator.run("You are a helpful agent that answers questions. What is the capital of France?")
+        results = generator.run(
+            "<|user|>\nWhat is the capital of France? Be concise only provide the capital, nothing else.<|end|>\n<|assistant|>\n"
+        )
 
         # Assert that the response contains the generated replies
         assert "replies" in results