diff --git a/docs/pydoc/config/generators_api.yml b/docs/pydoc/config/generators_api.yml index 6058dc54f..62b1d015c 100644 --- a/docs/pydoc/config/generators_api.yml +++ b/docs/pydoc/config/generators_api.yml @@ -11,7 +11,6 @@ loaders: "chat/hugging_face_local", "chat/hugging_face_tgi", "chat/openai", - "utils", ] ignore_when_discovered: ["__init__"] processors: diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py index 1ccf87d29..0a2f9fa0a 100644 --- a/haystack/components/generators/chat/azure.py +++ b/haystack/components/generators/chat/azure.py @@ -15,16 +15,29 @@ logger = logging.getLogger(__name__) class AzureOpenAIChatGenerator(OpenAIChatGenerator): """ - Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports gpt-4 and gpt-3.5-turbo + Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports `gpt-4` and `gpt-3.5-turbo` family of models accessed through the chat completions API endpoint. Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method - directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs` + directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs` parameter in `run` method. For more details on OpenAI models deployed on Azure, refer to the Microsoft [documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/). + Key Features and Compatibility: + - Primary Compatibility: Designed to work seamlessly with the OpenAI API Chat Completion endpoint. + - Streaming Support: Supports streaming responses from the OpenAI API Chat Completion endpoint. + - Customizability: Supports all parameters supported by the OpenAI API Chat Completion endpoint. + + Input and Output Format: + - ChatMessage Format: This component uses the ChatMessage format for structuring both input and output, ensuring + coherent and contextually relevant responses in chat-based text generation scenarios. + - Details on the ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage). + + + Usage example: + ```python from haystack.components.generators.chat import AzureOpenAIGenerator from haystack.dataclasses import ChatMessage @@ -32,30 +45,23 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator): messages = [ChatMessage.from_user("What's Natural Language Processing?")] - client = AzureOpenAIGenerator(azure_endpoint="", - api_key=Secret.from_token(""), - azure_deployment="") + client = AzureOpenAIGenerator( + azure_endpoint="", + api_key=Secret.from_token(""), + azure_deployment="") response = client.run(messages) print(response) - - >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence - >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is - >>meaningful and useful.', role=, name=None, - >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', - >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]} - ``` - Key Features and Compatibility: - - **Primary Compatibility**: Designed to work seamlessly with the OpenAI API Chat Completion endpoint - and gpt-4 and gpt-3.5-turbo family of models. - - **Streaming Support**: Supports streaming responses from the OpenAI API Chat Completion endpoint. - - **Customizability**: Supports all parameters supported by the OpenAI API Chat Completion endpoint. - - Input and Output Format: - - **ChatMessage Format**: This component uses the ChatMessage format for structuring both input and output, - ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the - ChatMessage format can be found at: https://github.com/openai/openai-python/blob/main/chatml.md. + ``` + {'replies': + [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on + enabling computers to understand, interpret, and generate human language in a way that is meaningful and useful.', + role=, name=None, + meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', + 'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})] + } + ``` """ # pylint: disable=super-init-not-called @@ -71,11 +77,11 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator): generation_kwargs: Optional[Dict[str, Any]] = None, ): """ - :param azure_endpoint: The endpoint of the deployed model, e.g. `https://example-resource.azure.openai.com/` + :param azure_endpoint: The endpoint of the deployed model, e.g. `"https://example-resource.azure.openai.com/"` :param api_version: The version of the API to use. Defaults to 2023-05-15 :param azure_deployment: The deployment of the model, usually the model name. :param api_key: The API key to use for authentication. - :param azure_ad_token: Azure Active Directory token, see https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id + :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id) :param organization: The Organization ID, defaults to `None`. See [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization). :param streaming_callback: A callback function that is called when a new token is received from the stream. @@ -138,7 +144,9 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator): def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. - :return: The serialized component as a dictionary. + + :returns: + The serialized component as a dictionary. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None return default_to_dict( @@ -157,8 +165,10 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator): def from_dict(cls, data: Dict[str, Any]) -> "AzureOpenAIChatGenerator": """ Deserialize this component from a dictionary. + :param data: The dictionary representation of this component. - :return: The deserialized component instance. + :returns: + The deserialized component instance. """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key", "azure_ad_token"]) init_params = data.get("init_parameters", {}) diff --git a/haystack/components/generators/chat/hugging_face_local.py b/haystack/components/generators/chat/hugging_face_local.py index c31f25822..6464f30e7 100644 --- a/haystack/components/generators/chat/hugging_face_local.py +++ b/haystack/components/generators/chat/hugging_face_local.py @@ -27,7 +27,6 @@ PIPELINE_SUPPORTED_TASKS = ["text-generation", "text2text-generation"] @component class HuggingFaceLocalChatGenerator: """ - The `HuggingFaceLocalChatGenerator` class is a component designed for generating chat responses using models from Hugging Face's model hub. It is tailored for local runtime text generation tasks and provides a convenient interface for working with chat-based models, such as `HuggingFaceH4/zephyr-7b-beta` or `meta-llama/Llama-2-7b-chat-hf` @@ -42,15 +41,22 @@ class HuggingFaceLocalChatGenerator: generator.warm_up() messages = [ChatMessage.from_user("What's Natural Language Processing? Be brief.")] print(generator.run(messages)) + ``` - # {'replies': [ChatMessage(content=' Natural Language Processing (NLP) is a subfield of artificial - intelligence that deals with the interaction between computers and human language. It enables computers - to understand, interpret, and generate human language in a valuable way. NLP involves various techniques - such as speech recognition, text analysis, sentiment analysis, and machine translation. The ultimate goal - is to make it easier for computers to process and derive meaning from human language, improving communication - between humans and machines.', role=, name=None, - meta={'finish_reason': 'stop', 'index': 0, 'model': 'mistralai/Mistral-7B-Instruct-v0.2', - 'usage': {'completion_tokens': 90, 'prompt_tokens': 19, 'total_tokens': 109}})]} + ``` + {'replies': + [ChatMessage(content=' Natural Language Processing (NLP) is a subfield of artificial intelligence that deals + with the interaction between computers and human language. It enables computers to understand, interpret, and + generate human language in a valuable way. NLP involves various techniques such as speech recognition, text + analysis, sentiment analysis, and machine translation. The ultimate goal is to make it easier for computers to + process and derive meaning from human language, improving communication between humans and machines.', + role=, + name=None, + meta={'finish_reason': 'stop', 'index': 0, 'model': + 'mistralai/Mistral-7B-Instruct-v0.2', + 'usage': {'completion_tokens': 90, 'prompt_tokens': 19, 'total_tokens': 109}}) + ] + } ``` """ @@ -68,7 +74,7 @@ class HuggingFaceLocalChatGenerator: ): """ :param model: The name or path of a Hugging Face model for text generation, - for example, mistralai/Mistral-7B-Instruct-v0.2,T TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ, etc. + for example, `mistralai/Mistral-7B-Instruct-v0.2`, `TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ`, etc. The important aspect of the model is that it should be a chat model and that it supports ChatML messaging format. If the model is also specified in the `huggingface_pipeline_kwargs`, this parameter will be ignored. @@ -89,10 +95,10 @@ class HuggingFaceLocalChatGenerator: or if you wish to use a custom template instead of the model's default, you can use this parameter to set your preferred chat template. :param generation_kwargs: A dictionary containing keyword arguments to customize text generation. - Some examples: `max_length`, `max_new_tokens`, `temperature`, `top_k`, `top_p`,... + Some examples: `max_length`, `max_new_tokens`, `temperature`, `top_k`, `top_p`, etc. See Hugging Face's documentation for more information: - - https://huggingface.co/docs/transformers/main/en/generation_strategies#customize-text-generation - - https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig + - - [customize-text-generation](https://huggingface.co/docs/transformers/main/en/generation_strategies#customize-text-generation) + - - [GenerationConfig](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig) - The only generation_kwargs we set by default is max_new_tokens, which is set to 512 tokens. :param huggingface_pipeline_kwargs: Dictionary containing keyword arguments used to initialize the Hugging Face pipeline for text generation. @@ -100,9 +106,7 @@ class HuggingFaceLocalChatGenerator: In case of duplication, these kwargs override `model`, `task`, `device`, and `token` init parameters. See Hugging Face's [documentation](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.pipeline.task) for more information on the available kwargs. - In this dictionary, you can also include `model_kwargs` to specify the kwargs - for model initialization: - https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained + In this dictionary, you can also include `model_kwargs` to specify the kwargs for [model initialization](https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained) :param stop_words: A list of stop words. If any one of the stop words is generated, the generation is stopped. If you provide this parameter, you should not specify the `stopping_criteria` in `generation_kwargs`. For some chat models, the output includes both the new text and the original prompt. @@ -169,12 +173,18 @@ class HuggingFaceLocalChatGenerator: return {"model": f"[object of type {type(self.huggingface_pipeline_kwargs['model'])}]"} def warm_up(self): + """ + Initializes the component. + """ if self.pipeline is None: self.pipeline = pipeline(**self.huggingface_pipeline_kwargs) def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None serialization_dict = default_to_dict( @@ -194,7 +204,12 @@ class HuggingFaceLocalChatGenerator: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceLocalChatGenerator": """ - Deserialize this component from a dictionary. + Deserializes the component from a dictionary. + + :param data: + The dictionary to deserialize from. + :returns: + The deserialized component. """ torch_and_transformers_import.check() # leave this, cls method deserialize_secrets_inplace(data["init_parameters"], keys=["token"]) @@ -214,7 +229,8 @@ class HuggingFaceLocalChatGenerator: :param messages: A list of ChatMessage instances representing the input messages. :param generation_kwargs: Additional keyword arguments for text generation. - :return: A list containing the generated responses as ChatMessage instances. + :returns: + A list containing the generated responses as ChatMessage instances. """ if self.pipeline is None: raise RuntimeError("The generation model has not been loaded. Please call warm_up() before running.") @@ -287,7 +303,7 @@ class HuggingFaceLocalChatGenerator: :param tokenizer: The tokenizer used for generation. :param prompt: The prompt used for generation. :param generation_kwargs: The generation parameters. - :return: A ChatMessage instance. + :returns: A ChatMessage instance. """ completion_tokens = len(tokenizer.encode(text, add_special_tokens=False)) prompt_token_count = len(tokenizer.encode(prompt, add_special_tokens=False)) diff --git a/haystack/components/generators/chat/hugging_face_tgi.py b/haystack/components/generators/chat/hugging_face_tgi.py index ce03f249e..bbbd4c5ff 100644 --- a/haystack/components/generators/chat/hugging_face_tgi.py +++ b/haystack/components/generators/chat/hugging_face_tgi.py @@ -24,7 +24,28 @@ class HuggingFaceTGIChatGenerator: inference chat-based models deployed on the Text Generation Inference (TGI) backend. You can use this component for chat LLMs hosted on Hugging Face inference endpoints, the rate-limited - Inference API tier: + Inference API tier. + + Key Features and Compatibility: + - Primary Compatibility: designed to work seamlessly with any chat-based model deployed using the TGI + framework. For more information on TGI, visit [text-generation-inference](https://github.com/huggingface/text-generation-inference) + - Hugging Face Inference Endpoints: Supports inference of TGI chat LLMs deployed on Hugging Face + inference endpoints. For more details, refer to [inference-endpoints](https://huggingface.co/inference-endpoints) + + - Inference API Support: supports inference of TGI chat LLMs hosted on the rate-limited Inference + API tier. Learn more about the Inference API at [inference-api](https://huggingface.co/inference-api). + Discover available chat models using the following command: `wget -qO- https://api-inference.huggingface.co/framework/text-generation-inference | grep chat` + and simply use the model ID as the model parameter for this component. You'll also need to provide a valid + Hugging Face API token as the token parameter. + + - Custom TGI Endpoints: supports inference of TGI chat LLMs deployed on custom TGI endpoints. Anyone can + deploy their own TGI endpoint using the TGI framework. For more details, refer to [inference-endpoints](https://huggingface.co/inference-endpoints) + + Input and Output Format: + - ChatMessage Format: This component uses the ChatMessage format to structure both input and output, + ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the + ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage). + ```python from haystack.components.generators.chat import HuggingFaceTGIChatGenerator @@ -58,29 +79,6 @@ class HuggingFaceTGIChatGenerator: response = client.run(messages, generation_kwargs={"max_new_tokens": 120}) print(response) ``` - - Key Features and Compatibility: - - **Primary Compatibility**: Designed to work seamlessly with any chat-based model deployed using the TGI - framework. For more information on TGI, visit https://github.com/huggingface/text-generation-inference. - - **Hugging Face Inference Endpoints**: Supports inference of TGI chat LLMs deployed on Hugging Face - inference endpoints. For more details, refer to https://huggingface.co/inference-endpoints. - - **Inference API Support**: Supports inference of TGI chat LLMs hosted on the rate-limited Inference - API tier. Learn more about the Inference API at https://huggingface.co/inference-api. - Discover available chat models using the following command: - ``` - wget -qO- https://api-inference.huggingface.co/framework/text-generation-inference | grep chat - ``` - and simply use the model ID as the model parameter for this component. You'll also need to provide a valid - Hugging Face API token as the token parameter. - - **Custom TGI Endpoints**: Supports inference of TGI chat LLMs deployed on custom TGI endpoints. Anyone can - deploy their own TGI endpoint using the TGI framework. For more details, refer - to https://huggingface.co/inference-endpoints. - - Input and Output Format: - - **ChatMessage Format**: This component uses the ChatMessage format to structure both input and output, - ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the - ChatMessage format can be found at https://github.com/openai/openai-python/blob/main/chatml.md. - """ def __init__( diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 9b3ad6a0a..d5f05d50e 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -19,11 +19,11 @@ logger = logging.getLogger(__name__) @component class OpenAIChatGenerator: """ - Enables text generation using OpenAI's large language models (LLMs). It supports gpt-4 and gpt-3.5-turbo + Enables text generation using OpenAI's large language models (LLMs). It supports `gpt-4` and `gpt-3.5-turbo` family of models accessed through the chat completions API endpoint. Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method - directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs` + directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs` parameter in `run` method. For more details on the parameters supported by the OpenAI API, refer to the OpenAI @@ -38,25 +38,29 @@ class OpenAIChatGenerator: client = OpenAIChatGenerator() response = client.run(messages) print(response) - - >>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence - >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is - >>meaningful and useful.', role=, name=None, - >>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', - >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]} - + ``` + Output: + ``` + {'replies': + [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence + that focuses on enabling computers to understand, interpret, and generate human language in + a way that is meaningful and useful.', + role=, name=None, + meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop', + 'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}}) + ] + } ``` Key Features and Compatibility: - - **Primary Compatibility**: Designed to work seamlessly with the OpenAI API Chat Completion endpoint - and gpt-4 and gpt-3.5-turbo family of models. - - **Streaming Support**: Supports streaming responses from the OpenAI API Chat Completion endpoint. - - **Customizability**: Supports all parameters supported by the OpenAI API Chat Completion endpoint. + - Primary Compatibility: designed to work seamlessly with the OpenAI API Chat Completion endpoint and `gpt-4` and `gpt-3.5-turbo` family of models. + - Streaming Support: supports streaming responses from the OpenAI API Chat Completion endpoint. + - Customizability: supports all parameters supported by the OpenAI API Chat Completion endpoint. Input and Output Format: - - **ChatMessage Format**: This component uses the ChatMessage format for structuring both input and output, - ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the - ChatMessage format can be found at: https://github.com/openai/openai-python/blob/main/chatml.md. + - ChatMessage Format: this component uses the ChatMessage format for structuring both input and output, + ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the + ChatMessage format can be found at [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage). """ def __init__( @@ -116,7 +120,9 @@ class OpenAIChatGenerator: def to_dict(self) -> Dict[str, Any]: """ Serialize this component to a dictionary. - :return: The serialized component as a dictionary. + + :returns: + The serialized component as a dictionary. """ callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None return default_to_dict( @@ -133,8 +139,10 @@ class OpenAIChatGenerator: def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChatGenerator": """ Deserialize this component from a dictionary. + :param data: The dictionary representation of this component. - :return: The deserialized component instance. + :returns: + The deserialized component instance. """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) init_params = data.get("init_parameters", {}) @@ -150,10 +158,12 @@ class OpenAIChatGenerator: :param messages: A list of ChatMessage instances representing the input messages. :param generation_kwargs: Additional keyword arguments for text generation. These parameters will - potentially override the parameters passed in the __init__ method. - For more details on the parameters supported by the OpenAI API, refer to the - OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create). - :return: A list containing the generated responses as ChatMessage instances. + potentially override the parameters passed in the `__init__` method. + For more details on the parameters supported by the OpenAI API, refer to the + OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create). + + :returns: + A list containing the generated responses as ChatMessage instances. """ # update generation kwargs by merging with the generation kwargs passed to the run method