chore: reenable HF API Embedders tests + improve HFAPIChatGenerator docstrings (#9589)

* chore: reenable some HF API tests + improve docstrings

* revert deletion
This commit is contained in:
Stefano Fiorucci 2025-07-04 09:39:43 +02:00 committed by GitHub
parent 050c987946
commit 646eedf26a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 15 additions and 23 deletions

View File

@ -193,13 +193,13 @@ class HuggingFaceAPIChatGenerator:
HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
format for input and output. Use it to generate text with Hugging Face APIs:
- [Free Serverless Inference API](https://huggingface.co/inference-api)
- [Serverless Inference API (Inference Providers)](https://huggingface.co/docs/inference-providers)
- [Paid Inference Endpoints](https://huggingface.co/inference-endpoints)
- [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference)
### Usage examples
#### With the free serverless inference API
#### With the serverless inference API (Inference Providers) - free tier available
```python
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
@ -215,7 +215,8 @@ class HuggingFaceAPIChatGenerator:
api_type = "serverless_inference_api" # this is equivalent to the above
generator = HuggingFaceAPIChatGenerator(api_type=api_type,
api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
api_params={"model": "microsoft/Phi-3.5-mini-instruct",
"provider": "featherless-ai"},
token=Secret.from_token("<your-api-key>"))
result = generator.run(messages)
@ -273,13 +274,15 @@ class HuggingFaceAPIChatGenerator:
The type of Hugging Face API to use. Available types:
- `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference).
- `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints).
- `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api).
- `serverless_inference_api`: See
[Serverless Inference API - Inference Providers](https://huggingface.co/docs/inference-providers).
:param api_params:
A dictionary with the following keys:
- `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`.
- `provider`: Provider name. Recommended when `api_type` is `SERVERLESS_INFERENCE_API`.
- `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or
`TEXT_GENERATION_INFERENCE`.
- Other parameters specific to the chosen API type, such as `timeout`, `headers`, `provider` etc.
- Other parameters specific to the chosen API type, such as `timeout`, `headers`, etc.
:param token:
The Hugging Face token to use as HTTP bearer authorization.
Check your HF token in your [account settings](https://huggingface.co/settings/tokens).

View File

@ -91,7 +91,7 @@ dependencies = [
"transformers[torch, sentencepiece]>=4.52.4,<4.53", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
"huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders
"sentence-transformers>=4.1.0", # Sentence Transformers Embedders, Rankers, and SASEvaluator
"sentence-transformers>=4.1.0", # Sentence Transformers Embedders, Rankers, and SASEvaluator
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier
"openai-whisper>=20231106", # LocalWhisperTranscriber
"arrow>=1.3.0", # Jinja2TimeExtension

View File

@ -369,17 +369,13 @@ class TestHuggingFaceAPIDocumentEmbedder:
assert truncate is True
assert normalize is False
@pytest.mark.flaky(reruns=5, reruns_delay=5)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
)
@pytest.mark.skip(
reason="HF Inference API is not currently serving these models. "
"See https://github.com/deepset-ai/haystack/issues/9586."
)
@pytest.mark.flaky(reruns=2, reruns_delay=10)
def test_live_run_serverless(self):
docs = [
Document(content="I love cheese", meta={"topic": "Cuisine"}),

View File

@ -214,17 +214,13 @@ class TestHuggingFaceAPITextEmbedder:
with pytest.raises(ValueError):
embedder.run(text="The food was delicious")
@pytest.mark.flaky(reruns=5, reruns_delay=5)
@pytest.mark.integration
@pytest.mark.slow
@pytest.mark.flaky(reruns=2, reruns_delay=10)
@pytest.mark.skipif(
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
)
@pytest.mark.skip(
reason="HF Inference API is not currently serving these models. "
"See https://github.com/deepset-ai/haystack/issues/9586."
)
def test_live_run_serverless(self):
embedder = HuggingFaceAPITextEmbedder(
api_type=HFEmbeddingAPIType.SERVERLESS_INFERENCE_API,
@ -238,11 +234,8 @@ class TestHuggingFaceAPITextEmbedder:
@pytest.mark.integration
@pytest.mark.asyncio
@pytest.mark.slow
@pytest.mark.flaky(reruns=2, reruns_delay=10)
@pytest.mark.skipif(os.environ.get("HF_API_TOKEN", "") == "", reason="HF_API_TOKEN is not set")
@pytest.mark.skip(
reason="HF Inference API is not currently serving these models. "
"See https://github.com/deepset-ai/haystack/issues/9586."
)
async def test_live_run_async_serverless(self):
model_name = "sentence-transformers/all-MiniLM-L6-v2"

View File

@ -750,7 +750,7 @@ class TestHuggingFaceAPIChatGenerator:
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
)
@pytest.mark.flaky(reruns=3, reruns_delay=10)
@pytest.mark.flaky(reruns=2, reruns_delay=10)
def test_live_run_serverless(self):
generator = HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@ -785,7 +785,7 @@ class TestHuggingFaceAPIChatGenerator:
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
)
@pytest.mark.flaky(reruns=3, reruns_delay=10)
@pytest.mark.flaky(reruns=2, reruns_delay=10)
def test_live_run_serverless_streaming(self):
generator = HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@ -1021,7 +1021,7 @@ class TestHuggingFaceAPIChatGenerator:
not os.environ.get("HF_API_TOKEN", None),
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
)
@pytest.mark.flaky(reruns=3, reruns_delay=10)
@pytest.mark.flaky(reruns=2, reruns_delay=10)
@pytest.mark.asyncio
async def test_live_run_async_serverless(self):
generator = HuggingFaceAPIChatGenerator(