diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py
index 7a34a5aa3..ea5dd89cb 100644
--- a/haystack/components/generators/chat/hugging_face_api.py
+++ b/haystack/components/generators/chat/hugging_face_api.py
@@ -193,13 +193,13 @@ class HuggingFaceAPIChatGenerator:
 
     HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
     format for input and output. Use it to generate text with Hugging Face APIs:
-    - [Free Serverless Inference API](https://huggingface.co/inference-api)
+    - [Serverless Inference API (Inference Providers)](https://huggingface.co/docs/inference-providers)
     - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
     - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)
 
     ### Usage examples
 
-    #### With the free serverless inference API
+    #### With the serverless inference API (Inference Providers) - free tier available
 
     ```python
     from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
@@ -215,7 +215,8 @@ class HuggingFaceAPIChatGenerator:
     api_type = "serverless_inference_api" # this is equivalent to the above
 
     generator = HuggingFaceAPIChatGenerator(api_type=api_type,
-                                            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
+                                            api_params={"model": "microsoft/Phi-3.5-mini-instruct",
+                                                        "provider": "featherless-ai"},
                                             token=Secret.from_token("<your-api-key>"))
 
     result = generator.run(messages)
@@ -273,13 +274,15 @@ class HuggingFaceAPIChatGenerator:
             The type of Hugging Face API to use. Available types:
             - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
             - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
-            - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
+            - `serverless_inference_api`: See
+            [Serverless Inference API - Inference Providers](https://huggingface.co/docs/inference-providers).
         :param api_params:
             A dictionary with the following keys:
             - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
+            - `provider`: Provider name. Recommended when `api_type` is `SERVERLESS_INFERENCE_API`.
             - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
             `TEXT_GENERATION_INFERENCE`.
-            - Other parameters specific to the chosen API type, such as `timeout`, `headers`, `provider` etc.
+            - Other parameters specific to the chosen API type, such as `timeout`, `headers`, etc.
         :param token:
             The Hugging Face token to use as HTTP bearer authorization.
             Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
diff --git a/pyproject.toml b/pyproject.toml
index d33aa93ab..fdf10524d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ dependencies = [
 
   "transformers[torch, sentencepiece]>=4.52.4,<4.53", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
   "huggingface_hub>=0.27.0",                          # Hugging Face API Generators and Embedders
-  "sentence-transformers>=4.1.0",              # Sentence Transformers Embedders, Rankers, and SASEvaluator
+  "sentence-transformers>=4.1.0",                     # Sentence Transformers Embedders, Rankers, and SASEvaluator
   "langdetect",                                       # TextLanguageRouter and DocumentLanguageClassifier
   "openai-whisper>=20231106",                         # LocalWhisperTranscriber
   "arrow>=1.3.0",                                     # Jinja2TimeExtension
diff --git a/test/components/embedders/test_hugging_face_api_document_embedder.py b/test/components/embedders/test_hugging_face_api_document_embedder.py
index 5dad17699..7311716e8 100644
--- a/test/components/embedders/test_hugging_face_api_document_embedder.py
+++ b/test/components/embedders/test_hugging_face_api_document_embedder.py
@@ -369,17 +369,13 @@ class TestHuggingFaceAPIDocumentEmbedder:
         assert truncate is True
         assert normalize is False
 
-    @pytest.mark.flaky(reruns=5, reruns_delay=5)
     @pytest.mark.integration
     @pytest.mark.slow
     @pytest.mark.skipif(
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
-    @pytest.mark.skip(
-        reason="HF Inference API is not currently serving these models. "
-        "See https://github.com/deepset-ai/haystack/issues/9586."
-    )
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
     def test_live_run_serverless(self):
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
diff --git a/test/components/embedders/test_hugging_face_api_text_embedder.py b/test/components/embedders/test_hugging_face_api_text_embedder.py
index d6c0582e0..80f641c23 100644
--- a/test/components/embedders/test_hugging_face_api_text_embedder.py
+++ b/test/components/embedders/test_hugging_face_api_text_embedder.py
@@ -214,17 +214,13 @@ class TestHuggingFaceAPITextEmbedder:
             with pytest.raises(ValueError):
                 embedder.run(text="The food was delicious")
 
-    @pytest.mark.flaky(reruns=5, reruns_delay=5)
     @pytest.mark.integration
     @pytest.mark.slow
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
     @pytest.mark.skipif(
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
-    @pytest.mark.skip(
-        reason="HF Inference API is not currently serving these models. "
-        "See https://github.com/deepset-ai/haystack/issues/9586."
-    )
     def test_live_run_serverless(self):
         embedder = HuggingFaceAPITextEmbedder(
             api_type=HFEmbeddingAPIType.SERVERLESS_INFERENCE_API,
@@ -238,11 +234,8 @@ class TestHuggingFaceAPITextEmbedder:
     @pytest.mark.integration
     @pytest.mark.asyncio
     @pytest.mark.slow
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
     @pytest.mark.skipif(os.environ.get("HF_API_TOKEN", "") == "", reason="HF_API_TOKEN is not set")
-    @pytest.mark.skip(
-        reason="HF Inference API is not currently serving these models. "
-        "See https://github.com/deepset-ai/haystack/issues/9586."
-    )
     async def test_live_run_async_serverless(self):
         model_name = "sentence-transformers/all-MiniLM-L6-v2"
 
diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py
index f0db2389c..71e6eeb9b 100644
--- a/test/components/generators/chat/test_hugging_face_api.py
+++ b/test/components/generators/chat/test_hugging_face_api.py
@@ -750,7 +750,7 @@ class TestHuggingFaceAPIChatGenerator:
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
-    @pytest.mark.flaky(reruns=3, reruns_delay=10)
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
     def test_live_run_serverless(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@@ -785,7 +785,7 @@ class TestHuggingFaceAPIChatGenerator:
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
-    @pytest.mark.flaky(reruns=3, reruns_delay=10)
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
     def test_live_run_serverless_streaming(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@@ -1021,7 +1021,7 @@ class TestHuggingFaceAPIChatGenerator:
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
-    @pytest.mark.flaky(reruns=3, reruns_delay=10)
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
     @pytest.mark.asyncio
     async def test_live_run_async_serverless(self):
         generator = HuggingFaceAPIChatGenerator(