chore: reenable HF API Embedders tests + improve HFAPIChatGenerator docstrings (#9589)

* chore: reenable some HF API tests + improve docstrings * revert deletion
2026-01-06 12:07:04 +00:00 · 2025-07-04 09:39:43 +02:00 · 2025-07-04 09:39:43 +02:00 · 646eedf26a
commit 646eedf26a
parent 050c987946
5 changed files with 15 additions and 23 deletions
--- a/haystack/components/generators/chat/hugging_face_api.py
+++ b/haystack/components/generators/chat/hugging_face_api.py
@ -193,13 +193,13 @@ class HuggingFaceAPIChatGenerator:

    HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
    format for input and output. Use it to generate text with Hugging Face APIs:
-    - [Free Serverless Inference API](https://huggingface.co/inference-api)
+    - [Serverless Inference API (Inference Providers)](https://huggingface.co/docs/inference-providers)
    - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
    - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)

    ### Usage examples

-    #### With the free serverless inference API
+    #### With the serverless inference API (Inference Providers) - free tier available

    ```python
    from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
@ -215,7 +215,8 @@ class HuggingFaceAPIChatGenerator:
    api_type = "serverless_inference_api" # this is equivalent to the above

    generator = HuggingFaceAPIChatGenerator(api_type=api_type,
-                                            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
+                                            api_params={"model": "microsoft/Phi-3.5-mini-instruct",
+                                                        "provider": "featherless-ai"},
                                            token=Secret.from_token("<your-api-key>"))

    result = generator.run(messages)
@ -273,13 +274,15 @@ class HuggingFaceAPIChatGenerator:
            The type of Hugging Face API to use. Available types:
            - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
            - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
-            - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
+            - `serverless_inference_api`: See
+            [Serverless Inference API - Inference Providers](https://huggingface.co/docs/inference-providers).
        :param api_params:
            A dictionary with the following keys:
            - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
+            - `provider`: Provider name. Recommended when `api_type` is `SERVERLESS_INFERENCE_API`.
            - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
            `TEXT_GENERATION_INFERENCE`.
-            - Other parameters specific to the chosen API type, such as `timeout`, `headers`, `provider` etc.
+            - Other parameters specific to the chosen API type, such as `timeout`, `headers`, etc.
        :param token:
            The Hugging Face token to use as HTTP bearer authorization.
            Check your HF token in your [account settings](https://huggingface.co/settings/tokens).
--- a/pyproject.toml
+++ b/pyproject.toml
@ -91,7 +91,7 @@ dependencies = [

  "transformers[torch, sentencepiece]>=4.52.4,<4.53", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
  "huggingface_hub>=0.27.0",                          # Hugging Face API Generators and Embedders
-  "sentence-transformers>=4.1.0",              # Sentence Transformers Embedders, Rankers, and SASEvaluator
+  "sentence-transformers>=4.1.0",                     # Sentence Transformers Embedders, Rankers, and SASEvaluator
  "langdetect",                                       # TextLanguageRouter and DocumentLanguageClassifier
  "openai-whisper>=20231106",                         # LocalWhisperTranscriber
  "arrow>=1.3.0",                                     # Jinja2TimeExtension
--- a/test/components/embedders/test_hugging_face_api_document_embedder.py
+++ b/test/components/embedders/test_hugging_face_api_document_embedder.py
@ -369,17 +369,13 @@ class TestHuggingFaceAPIDocumentEmbedder:
        assert truncate is True
        assert normalize is False

-    @pytest.mark.flaky(reruns=5, reruns_delay=5)
    @pytest.mark.integration
    @pytest.mark.slow
    @pytest.mark.skipif(
        not os.environ.get("HF_API_TOKEN", None),
        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
    )
-    @pytest.mark.skip(
-        reason="HF Inference API is not currently serving these models. "
-        "See https://github.com/deepset-ai/haystack/issues/9586."
-    )
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
    def test_live_run_serverless(self):
        docs = [
            Document(content="I love cheese", meta={"topic": "Cuisine"}),
--- a/test/components/embedders/test_hugging_face_api_text_embedder.py
+++ b/test/components/embedders/test_hugging_face_api_text_embedder.py
@ -214,17 +214,13 @@ class TestHuggingFaceAPITextEmbedder:
            with pytest.raises(ValueError):
                embedder.run(text="The food was delicious")

-    @pytest.mark.flaky(reruns=5, reruns_delay=5)
    @pytest.mark.integration
    @pytest.mark.slow
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
    @pytest.mark.skipif(
        not os.environ.get("HF_API_TOKEN", None),
        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
    )
-    @pytest.mark.skip(
-        reason="HF Inference API is not currently serving these models. "
-        "See https://github.com/deepset-ai/haystack/issues/9586."
-    )
    def test_live_run_serverless(self):
        embedder = HuggingFaceAPITextEmbedder(
            api_type=HFEmbeddingAPIType.SERVERLESS_INFERENCE_API,
@ -238,11 +234,8 @@ class TestHuggingFaceAPITextEmbedder:
    @pytest.mark.integration
    @pytest.mark.asyncio
    @pytest.mark.slow
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
    @pytest.mark.skipif(os.environ.get("HF_API_TOKEN", "") == "", reason="HF_API_TOKEN is not set")
-    @pytest.mark.skip(
-        reason="HF Inference API is not currently serving these models. "
-        "See https://github.com/deepset-ai/haystack/issues/9586."
-    )
    async def test_live_run_async_serverless(self):
        model_name = "sentence-transformers/all-MiniLM-L6-v2"

--- a/test/components/generators/chat/test_hugging_face_api.py
+++ b/test/components/generators/chat/test_hugging_face_api.py
@ -750,7 +750,7 @@ class TestHuggingFaceAPIChatGenerator:
        not os.environ.get("HF_API_TOKEN", None),
        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
    )
-    @pytest.mark.flaky(reruns=3, reruns_delay=10)
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
    def test_live_run_serverless(self):
        generator = HuggingFaceAPIChatGenerator(
            api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@ -785,7 +785,7 @@ class TestHuggingFaceAPIChatGenerator:
        not os.environ.get("HF_API_TOKEN", None),
        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
    )
-    @pytest.mark.flaky(reruns=3, reruns_delay=10)
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
    def test_live_run_serverless_streaming(self):
        generator = HuggingFaceAPIChatGenerator(
            api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@ -1021,7 +1021,7 @@ class TestHuggingFaceAPIChatGenerator:
        not os.environ.get("HF_API_TOKEN", None),
        reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
    )
-    @pytest.mark.flaky(reruns=3, reruns_delay=10)
+    @pytest.mark.flaky(reruns=2, reruns_delay=10)
    @pytest.mark.asyncio
    async def test_live_run_async_serverless(self):
        generator = HuggingFaceAPIChatGenerator(