diff --git a/docs/pydoc/config/generators_api.yml b/docs/pydoc/config/generators_api.yml
index 6058dc54f..62b1d015c 100644
--- a/docs/pydoc/config/generators_api.yml
+++ b/docs/pydoc/config/generators_api.yml
@@ -11,7 +11,6 @@ loaders:
"chat/hugging_face_local",
"chat/hugging_face_tgi",
"chat/openai",
- "utils",
]
ignore_when_discovered: ["__init__"]
processors:
diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py
index 1ccf87d29..0a2f9fa0a 100644
--- a/haystack/components/generators/chat/azure.py
+++ b/haystack/components/generators/chat/azure.py
@@ -15,16 +15,29 @@ logger = logging.getLogger(__name__)
class AzureOpenAIChatGenerator(OpenAIChatGenerator):
"""
- Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports gpt-4 and gpt-3.5-turbo
+ Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports `gpt-4` and `gpt-3.5-turbo`
family of models accessed through the chat completions API endpoint.
Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
- directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs`
+ directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
parameter in `run` method.
For more details on OpenAI models deployed on Azure, refer to the Microsoft
[documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/).
+ Key Features and Compatibility:
+ - Primary Compatibility: Designed to work seamlessly with the OpenAI API Chat Completion endpoint.
+ - Streaming Support: Supports streaming responses from the OpenAI API Chat Completion endpoint.
+ - Customizability: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
+
+ Input and Output Format:
+ - ChatMessage Format: This component uses the ChatMessage format for structuring both input and output, ensuring
+ coherent and contextually relevant responses in chat-based text generation scenarios.
+ - Details on the ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
+
+
+ Usage example:
+
```python
from haystack.components.generators.chat import AzureOpenAIGenerator
from haystack.dataclasses import ChatMessage
@@ -32,30 +45,23 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
messages = [ChatMessage.from_user("What's Natural Language Processing?")]
- client = AzureOpenAIGenerator(azure_endpoint="",
- api_key=Secret.from_token(""),
- azure_deployment="")
+ client = AzureOpenAIGenerator(
+ azure_endpoint="",
+ api_key=Secret.from_token(""),
+ azure_deployment="")
response = client.run(messages)
print(response)
-
- >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
- >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
- >>meaningful and useful.', role=, name=None,
- >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
- >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
-
```
- Key Features and Compatibility:
- - **Primary Compatibility**: Designed to work seamlessly with the OpenAI API Chat Completion endpoint
- and gpt-4 and gpt-3.5-turbo family of models.
- - **Streaming Support**: Supports streaming responses from the OpenAI API Chat Completion endpoint.
- - **Customizability**: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
-
- Input and Output Format:
- - **ChatMessage Format**: This component uses the ChatMessage format for structuring both input and output,
- ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
- ChatMessage format can be found at: https://github.com/openai/openai-python/blob/main/chatml.md.
+ ```
+ {'replies':
+ [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
+ enabling computers to understand, interpret, and generate human language in a way that is meaningful and useful.',
+ role=, name=None,
+ meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
+ 'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]
+ }
+ ```
"""
# pylint: disable=super-init-not-called
@@ -71,11 +77,11 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
generation_kwargs: Optional[Dict[str, Any]] = None,
):
"""
- :param azure_endpoint: The endpoint of the deployed model, e.g. `https://example-resource.azure.openai.com/`
+ :param azure_endpoint: The endpoint of the deployed model, e.g. `"https://example-resource.azure.openai.com/"`
:param api_version: The version of the API to use. Defaults to 2023-05-15
:param azure_deployment: The deployment of the model, usually the model name.
:param api_key: The API key to use for authentication.
- :param azure_ad_token: Azure Active Directory token, see https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
+ :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id)
:param organization: The Organization ID, defaults to `None`. See
[production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
:param streaming_callback: A callback function that is called when a new token is received from the stream.
@@ -138,7 +144,9 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
- :return: The serialized component as a dictionary.
+
+ :returns:
+ The serialized component as a dictionary.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
return default_to_dict(
@@ -157,8 +165,10 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
def from_dict(cls, data: Dict[str, Any]) -> "AzureOpenAIChatGenerator":
"""
Deserialize this component from a dictionary.
+
:param data: The dictionary representation of this component.
- :return: The deserialized component instance.
+ :returns:
+ The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key", "azure_ad_token"])
init_params = data.get("init_parameters", {})
diff --git a/haystack/components/generators/chat/hugging_face_local.py b/haystack/components/generators/chat/hugging_face_local.py
index c31f25822..6464f30e7 100644
--- a/haystack/components/generators/chat/hugging_face_local.py
+++ b/haystack/components/generators/chat/hugging_face_local.py
@@ -27,7 +27,6 @@ PIPELINE_SUPPORTED_TASKS = ["text-generation", "text2text-generation"]
@component
class HuggingFaceLocalChatGenerator:
"""
-
The `HuggingFaceLocalChatGenerator` class is a component designed for generating chat responses using models from
Hugging Face's model hub. It is tailored for local runtime text generation tasks and provides a convenient interface
for working with chat-based models, such as `HuggingFaceH4/zephyr-7b-beta` or `meta-llama/Llama-2-7b-chat-hf`
@@ -42,15 +41,22 @@ class HuggingFaceLocalChatGenerator:
generator.warm_up()
messages = [ChatMessage.from_user("What's Natural Language Processing? Be brief.")]
print(generator.run(messages))
+ ```
- # {'replies': [ChatMessage(content=' Natural Language Processing (NLP) is a subfield of artificial
- intelligence that deals with the interaction between computers and human language. It enables computers
- to understand, interpret, and generate human language in a valuable way. NLP involves various techniques
- such as speech recognition, text analysis, sentiment analysis, and machine translation. The ultimate goal
- is to make it easier for computers to process and derive meaning from human language, improving communication
- between humans and machines.', role=, name=None,
- meta={'finish_reason': 'stop', 'index': 0, 'model': 'mistralai/Mistral-7B-Instruct-v0.2',
- 'usage': {'completion_tokens': 90, 'prompt_tokens': 19, 'total_tokens': 109}})]}
+ ```
+ {'replies':
+ [ChatMessage(content=' Natural Language Processing (NLP) is a subfield of artificial intelligence that deals
+ with the interaction between computers and human language. It enables computers to understand, interpret, and
+ generate human language in a valuable way. NLP involves various techniques such as speech recognition, text
+ analysis, sentiment analysis, and machine translation. The ultimate goal is to make it easier for computers to
+ process and derive meaning from human language, improving communication between humans and machines.',
+ role=,
+ name=None,
+ meta={'finish_reason': 'stop', 'index': 0, 'model':
+ 'mistralai/Mistral-7B-Instruct-v0.2',
+ 'usage': {'completion_tokens': 90, 'prompt_tokens': 19, 'total_tokens': 109}})
+ ]
+ }
```
"""
@@ -68,7 +74,7 @@ class HuggingFaceLocalChatGenerator:
):
"""
:param model: The name or path of a Hugging Face model for text generation,
- for example, mistralai/Mistral-7B-Instruct-v0.2,T TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ, etc.
+ for example, `mistralai/Mistral-7B-Instruct-v0.2`, `TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ`, etc.
The important aspect of the model is that it should be a chat model and that it supports ChatML messaging
format.
If the model is also specified in the `huggingface_pipeline_kwargs`, this parameter will be ignored.
@@ -89,10 +95,10 @@ class HuggingFaceLocalChatGenerator:
or if you wish to use a custom template instead of the model's default, you can use this parameter to
set your preferred chat template.
:param generation_kwargs: A dictionary containing keyword arguments to customize text generation.
- Some examples: `max_length`, `max_new_tokens`, `temperature`, `top_k`, `top_p`,...
+ Some examples: `max_length`, `max_new_tokens`, `temperature`, `top_k`, `top_p`, etc.
See Hugging Face's documentation for more information:
- - https://huggingface.co/docs/transformers/main/en/generation_strategies#customize-text-generation
- - https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig
+ - - [customize-text-generation](https://huggingface.co/docs/transformers/main/en/generation_strategies#customize-text-generation)
+ - - [GenerationConfig](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig)
- The only generation_kwargs we set by default is max_new_tokens, which is set to 512 tokens.
:param huggingface_pipeline_kwargs: Dictionary containing keyword arguments used to initialize the
Hugging Face pipeline for text generation.
@@ -100,9 +106,7 @@ class HuggingFaceLocalChatGenerator:
In case of duplication, these kwargs override `model`, `task`, `device`, and `token` init parameters.
See Hugging Face's [documentation](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.pipeline.task)
for more information on the available kwargs.
- In this dictionary, you can also include `model_kwargs` to specify the kwargs
- for model initialization:
- https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained
+ In this dictionary, you can also include `model_kwargs` to specify the kwargs for [model initialization](https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained)
:param stop_words: A list of stop words. If any one of the stop words is generated, the generation is stopped.
If you provide this parameter, you should not specify the `stopping_criteria` in `generation_kwargs`.
For some chat models, the output includes both the new text and the original prompt.
@@ -169,12 +173,18 @@ class HuggingFaceLocalChatGenerator:
return {"model": f"[object of type {type(self.huggingface_pipeline_kwargs['model'])}]"}
def warm_up(self):
+ """
+ Initializes the component.
+ """
if self.pipeline is None:
self.pipeline = pipeline(**self.huggingface_pipeline_kwargs)
def to_dict(self) -> Dict[str, Any]:
"""
- Serialize this component to a dictionary.
+ Serializes the component to a dictionary.
+
+ :returns:
+ Dictionary with serialized data.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
serialization_dict = default_to_dict(
@@ -194,7 +204,12 @@ class HuggingFaceLocalChatGenerator:
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceLocalChatGenerator":
"""
- Deserialize this component from a dictionary.
+ Deserializes the component from a dictionary.
+
+ :param data:
+ The dictionary to deserialize from.
+ :returns:
+ The deserialized component.
"""
torch_and_transformers_import.check() # leave this, cls method
deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
@@ -214,7 +229,8 @@ class HuggingFaceLocalChatGenerator:
:param messages: A list of ChatMessage instances representing the input messages.
:param generation_kwargs: Additional keyword arguments for text generation.
- :return: A list containing the generated responses as ChatMessage instances.
+ :returns:
+ A list containing the generated responses as ChatMessage instances.
"""
if self.pipeline is None:
raise RuntimeError("The generation model has not been loaded. Please call warm_up() before running.")
@@ -287,7 +303,7 @@ class HuggingFaceLocalChatGenerator:
:param tokenizer: The tokenizer used for generation.
:param prompt: The prompt used for generation.
:param generation_kwargs: The generation parameters.
- :return: A ChatMessage instance.
+ :returns: A ChatMessage instance.
"""
completion_tokens = len(tokenizer.encode(text, add_special_tokens=False))
prompt_token_count = len(tokenizer.encode(prompt, add_special_tokens=False))
diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py
index ce03f249e..bbbd4c5ff 100644
--- a/haystack/components/generators/chat/hugging_face_tgi.py
+++ b/haystack/components/generators/chat/hugging_face_tgi.py
@@ -24,7 +24,28 @@ class HuggingFaceTGIChatGenerator:
inference chat-based models deployed on the Text Generation Inference (TGI) backend.
You can use this component for chat LLMs hosted on Hugging Face inference endpoints, the rate-limited
- Inference API tier:
+ Inference API tier.
+
+ Key Features and Compatibility:
+ - Primary Compatibility: designed to work seamlessly with any chat-based model deployed using the TGI
+ framework. For more information on TGI, visit [text-generation-inference](https://github.com/huggingface/text-generation-inference)
+ - Hugging Face Inference Endpoints: Supports inference of TGI chat LLMs deployed on Hugging Face
+ inference endpoints. For more details, refer to [inference-endpoints](https://huggingface.co/inference-endpoints)
+
+ - Inference API Support: supports inference of TGI chat LLMs hosted on the rate-limited Inference
+ API tier. Learn more about the Inference API at [inference-api](https://huggingface.co/inference-api).
+ Discover available chat models using the following command: `wget -qO- https://api-inference.huggingface.co/framework/text-generation-inference | grep chat`
+ and simply use the model ID as the model parameter for this component. You'll also need to provide a valid
+ Hugging Face API token as the token parameter.
+
+ - Custom TGI Endpoints: supports inference of TGI chat LLMs deployed on custom TGI endpoints. Anyone can
+ deploy their own TGI endpoint using the TGI framework. For more details, refer to [inference-endpoints](https://huggingface.co/inference-endpoints)
+
+ Input and Output Format:
+ - ChatMessage Format: This component uses the ChatMessage format to structure both input and output,
+ ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
+ ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
+
```python
from haystack.components.generators.chat import HuggingFaceTGIChatGenerator
@@ -58,29 +79,6 @@ class HuggingFaceTGIChatGenerator:
response = client.run(messages, generation_kwargs={"max_new_tokens": 120})
print(response)
```
-
- Key Features and Compatibility:
- - **Primary Compatibility**: Designed to work seamlessly with any chat-based model deployed using the TGI
- framework. For more information on TGI, visit https://github.com/huggingface/text-generation-inference.
- - **Hugging Face Inference Endpoints**: Supports inference of TGI chat LLMs deployed on Hugging Face
- inference endpoints. For more details, refer to https://huggingface.co/inference-endpoints.
- - **Inference API Support**: Supports inference of TGI chat LLMs hosted on the rate-limited Inference
- API tier. Learn more about the Inference API at https://huggingface.co/inference-api.
- Discover available chat models using the following command:
- ```
- wget -qO- https://api-inference.huggingface.co/framework/text-generation-inference | grep chat
- ```
- and simply use the model ID as the model parameter for this component. You'll also need to provide a valid
- Hugging Face API token as the token parameter.
- - **Custom TGI Endpoints**: Supports inference of TGI chat LLMs deployed on custom TGI endpoints. Anyone can
- deploy their own TGI endpoint using the TGI framework. For more details, refer
- to https://huggingface.co/inference-endpoints.
-
- Input and Output Format:
- - **ChatMessage Format**: This component uses the ChatMessage format to structure both input and output,
- ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
- ChatMessage format can be found at https://github.com/openai/openai-python/blob/main/chatml.md.
-
"""
def __init__(
diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
index 9b3ad6a0a..d5f05d50e 100644
--- a/haystack/components/generators/chat/openai.py
+++ b/haystack/components/generators/chat/openai.py
@@ -19,11 +19,11 @@ logger = logging.getLogger(__name__)
@component
class OpenAIChatGenerator:
"""
- Enables text generation using OpenAI's large language models (LLMs). It supports gpt-4 and gpt-3.5-turbo
+ Enables text generation using OpenAI's large language models (LLMs). It supports `gpt-4` and `gpt-3.5-turbo`
family of models accessed through the chat completions API endpoint.
Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
- directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs`
+ directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
parameter in `run` method.
For more details on the parameters supported by the OpenAI API, refer to the OpenAI
@@ -38,25 +38,29 @@ class OpenAIChatGenerator:
client = OpenAIChatGenerator()
response = client.run(messages)
print(response)
-
- >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
- >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
- >>meaningful and useful.', role=, name=None,
- >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
- >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
-
+ ```
+ Output:
+ ```
+ {'replies':
+ [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
+ that focuses on enabling computers to understand, interpret, and generate human language in
+ a way that is meaningful and useful.',
+ role=, name=None,
+ meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
+ 'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})
+ ]
+ }
```
Key Features and Compatibility:
- - **Primary Compatibility**: Designed to work seamlessly with the OpenAI API Chat Completion endpoint
- and gpt-4 and gpt-3.5-turbo family of models.
- - **Streaming Support**: Supports streaming responses from the OpenAI API Chat Completion endpoint.
- - **Customizability**: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
+ - Primary Compatibility: designed to work seamlessly with the OpenAI API Chat Completion endpoint and `gpt-4` and `gpt-3.5-turbo` family of models.
+ - Streaming Support: supports streaming responses from the OpenAI API Chat Completion endpoint.
+ - Customizability: supports all parameters supported by the OpenAI API Chat Completion endpoint.
Input and Output Format:
- - **ChatMessage Format**: This component uses the ChatMessage format for structuring both input and output,
- ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
- ChatMessage format can be found at: https://github.com/openai/openai-python/blob/main/chatml.md.
+ - ChatMessage Format: this component uses the ChatMessage format for structuring both input and output,
+ ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
+ ChatMessage format can be found at [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
"""
def __init__(
@@ -116,7 +120,9 @@ class OpenAIChatGenerator:
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
- :return: The serialized component as a dictionary.
+
+ :returns:
+ The serialized component as a dictionary.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
return default_to_dict(
@@ -133,8 +139,10 @@ class OpenAIChatGenerator:
def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChatGenerator":
"""
Deserialize this component from a dictionary.
+
:param data: The dictionary representation of this component.
- :return: The deserialized component instance.
+ :returns:
+ The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
init_params = data.get("init_parameters", {})
@@ -150,10 +158,12 @@ class OpenAIChatGenerator:
:param messages: A list of ChatMessage instances representing the input messages.
:param generation_kwargs: Additional keyword arguments for text generation. These parameters will
- potentially override the parameters passed in the __init__ method.
- For more details on the parameters supported by the OpenAI API, refer to the
- OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create).
- :return: A list containing the generated responses as ChatMessage instances.
+ potentially override the parameters passed in the `__init__` method.
+ For more details on the parameters supported by the OpenAI API, refer to the
+ OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create).
+
+ :returns:
+ A list containing the generated responses as ChatMessage instances.
"""
# update generation kwargs by merging with the generation kwargs passed to the run method