diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index 7a34a5aa3..ea5dd89cb 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -193,13 +193,13 @@ class HuggingFaceAPIChatGenerator: HuggingFaceAPIChatGenerator uses the [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage) format for input and output. Use it to generate text with Hugging Face APIs: - - [Free Serverless Inference API](https://huggingface.co/inference-api) + - [Serverless Inference API (Inference Providers)](https://huggingface.co/docs/inference-providers) - [Paid Inference Endpoints](https://huggingface.co/inference-endpoints) - [Self-hosted Text Generation Inference](https://github.com/huggingface/text-generation-inference) ### Usage examples - #### With the free serverless inference API + #### With the serverless inference API (Inference Providers) - free tier available ```python from haystack.components.generators.chat import HuggingFaceAPIChatGenerator @@ -215,7 +215,8 @@ class HuggingFaceAPIChatGenerator: api_type = "serverless_inference_api" # this is equivalent to the above generator = HuggingFaceAPIChatGenerator(api_type=api_type, - api_params={"model": "HuggingFaceH4/zephyr-7b-beta"}, + api_params={"model": "microsoft/Phi-3.5-mini-instruct", + "provider": "featherless-ai"}, token=Secret.from_token("")) result = generator.run(messages) @@ -273,13 +274,15 @@ class HuggingFaceAPIChatGenerator: The type of Hugging Face API to use. Available types: - `text_generation_inference`: See [TGI](https://github.com/huggingface/text-generation-inference). - `inference_endpoints`: See [Inference Endpoints](https://huggingface.co/inference-endpoints). - - `serverless_inference_api`: See [Serverless Inference API](https://huggingface.co/inference-api). + - `serverless_inference_api`: See + [Serverless Inference API - Inference Providers](https://huggingface.co/docs/inference-providers). :param api_params: A dictionary with the following keys: - `model`: Hugging Face model ID. Required when `api_type` is `SERVERLESS_INFERENCE_API`. + - `provider`: Provider name. Recommended when `api_type` is `SERVERLESS_INFERENCE_API`. - `url`: URL of the inference endpoint. Required when `api_type` is `INFERENCE_ENDPOINTS` or `TEXT_GENERATION_INFERENCE`. - - Other parameters specific to the chosen API type, such as `timeout`, `headers`, `provider` etc. + - Other parameters specific to the chosen API type, such as `timeout`, `headers`, etc. :param token: The Hugging Face token to use as HTTP bearer authorization. Check your HF token in your [account settings](https://huggingface.co/settings/tokens). diff --git a/pyproject.toml b/pyproject.toml index d33aa93ab..fdf10524d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ dependencies = [ "transformers[torch, sentencepiece]>=4.52.4,<4.53", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... "huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders - "sentence-transformers>=4.1.0", # Sentence Transformers Embedders, Rankers, and SASEvaluator + "sentence-transformers>=4.1.0", # Sentence Transformers Embedders, Rankers, and SASEvaluator "langdetect", # TextLanguageRouter and DocumentLanguageClassifier "openai-whisper>=20231106", # LocalWhisperTranscriber "arrow>=1.3.0", # Jinja2TimeExtension diff --git a/test/components/embedders/test_hugging_face_api_document_embedder.py b/test/components/embedders/test_hugging_face_api_document_embedder.py index 5dad17699..7311716e8 100644 --- a/test/components/embedders/test_hugging_face_api_document_embedder.py +++ b/test/components/embedders/test_hugging_face_api_document_embedder.py @@ -369,17 +369,13 @@ class TestHuggingFaceAPIDocumentEmbedder: assert truncate is True assert normalize is False - @pytest.mark.flaky(reruns=5, reruns_delay=5) @pytest.mark.integration @pytest.mark.slow @pytest.mark.skipif( not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) - @pytest.mark.skip( - reason="HF Inference API is not currently serving these models. " - "See https://github.com/deepset-ai/haystack/issues/9586." - ) + @pytest.mark.flaky(reruns=2, reruns_delay=10) def test_live_run_serverless(self): docs = [ Document(content="I love cheese", meta={"topic": "Cuisine"}), diff --git a/test/components/embedders/test_hugging_face_api_text_embedder.py b/test/components/embedders/test_hugging_face_api_text_embedder.py index d6c0582e0..80f641c23 100644 --- a/test/components/embedders/test_hugging_face_api_text_embedder.py +++ b/test/components/embedders/test_hugging_face_api_text_embedder.py @@ -214,17 +214,13 @@ class TestHuggingFaceAPITextEmbedder: with pytest.raises(ValueError): embedder.run(text="The food was delicious") - @pytest.mark.flaky(reruns=5, reruns_delay=5) @pytest.mark.integration @pytest.mark.slow + @pytest.mark.flaky(reruns=2, reruns_delay=10) @pytest.mark.skipif( not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) - @pytest.mark.skip( - reason="HF Inference API is not currently serving these models. " - "See https://github.com/deepset-ai/haystack/issues/9586." - ) def test_live_run_serverless(self): embedder = HuggingFaceAPITextEmbedder( api_type=HFEmbeddingAPIType.SERVERLESS_INFERENCE_API, @@ -238,11 +234,8 @@ class TestHuggingFaceAPITextEmbedder: @pytest.mark.integration @pytest.mark.asyncio @pytest.mark.slow + @pytest.mark.flaky(reruns=2, reruns_delay=10) @pytest.mark.skipif(os.environ.get("HF_API_TOKEN", "") == "", reason="HF_API_TOKEN is not set") - @pytest.mark.skip( - reason="HF Inference API is not currently serving these models. " - "See https://github.com/deepset-ai/haystack/issues/9586." - ) async def test_live_run_async_serverless(self): model_name = "sentence-transformers/all-MiniLM-L6-v2" diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index f0db2389c..71e6eeb9b 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -750,7 +750,7 @@ class TestHuggingFaceAPIChatGenerator: not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) - @pytest.mark.flaky(reruns=3, reruns_delay=10) + @pytest.mark.flaky(reruns=2, reruns_delay=10) def test_live_run_serverless(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, @@ -785,7 +785,7 @@ class TestHuggingFaceAPIChatGenerator: not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) - @pytest.mark.flaky(reruns=3, reruns_delay=10) + @pytest.mark.flaky(reruns=2, reruns_delay=10) def test_live_run_serverless_streaming(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, @@ -1021,7 +1021,7 @@ class TestHuggingFaceAPIChatGenerator: not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) - @pytest.mark.flaky(reruns=3, reruns_delay=10) + @pytest.mark.flaky(reruns=2, reruns_delay=10) @pytest.mark.asyncio async def test_live_run_async_serverless(self): generator = HuggingFaceAPIChatGenerator(