docs: docstrings - generators.chat (#7239)

* fixing docstrings

* Update haystack/components/generators/chat/azure.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/generators/chat/openai.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/generators/chat/hugging_face_tgi.py

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* fixing docstrings

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
This commit is contained in:
David S. Batista 2024-02-28 17:33:47 +01:00 committed by GitHub
parent f22d49944d
commit 0ddb48b70f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 126 additions and 93 deletions

View File

@ -11,7 +11,6 @@ loaders:
"chat/hugging_face_local",
"chat/hugging_face_tgi",
"chat/openai",
"utils",
]
ignore_when_discovered: ["__init__"]
processors:

View File

@ -15,16 +15,29 @@ logger = logging.getLogger(__name__)
class AzureOpenAIChatGenerator(OpenAIChatGenerator):
"""
Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports gpt-4 and gpt-3.5-turbo
Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports `gpt-4` and `gpt-3.5-turbo`
family of models accessed through the chat completions API endpoint.
Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs`
directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
parameter in `run` method.
For more details on OpenAI models deployed on Azure, refer to the Microsoft
[documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/).
Key Features and Compatibility:
- Primary Compatibility: Designed to work seamlessly with the OpenAI API Chat Completion endpoint.
- Streaming Support: Supports streaming responses from the OpenAI API Chat Completion endpoint.
- Customizability: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
Input and Output Format:
- ChatMessage Format: This component uses the ChatMessage format for structuring both input and output, ensuring
coherent and contextually relevant responses in chat-based text generation scenarios.
- Details on the ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
Usage example:
```python
from haystack.components.generators.chat import AzureOpenAIGenerator
from haystack.dataclasses import ChatMessage
@ -32,30 +45,23 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
messages = [ChatMessage.from_user("What's Natural Language Processing?")]
client = AzureOpenAIGenerator(azure_endpoint="<Your Azure endpoint e.g. `https://your-company.azure.openai.com/>",
api_key=Secret.from_token("<your-api-key>"),
azure_deployment="<this a model name, e.g. gpt-35-turbo>")
client = AzureOpenAIGenerator(
azure_endpoint="<Your Azure endpoint e.g. `https://your-company.azure.openai.com/>",
api_key=Secret.from_token("<your-api-key>"),
azure_deployment="<this a model name, e.g. gpt-35-turbo>")
response = client.run(messages)
print(response)
>>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
>>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
>>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
```
Key Features and Compatibility:
- **Primary Compatibility**: Designed to work seamlessly with the OpenAI API Chat Completion endpoint
and gpt-4 and gpt-3.5-turbo family of models.
- **Streaming Support**: Supports streaming responses from the OpenAI API Chat Completion endpoint.
- **Customizability**: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
Input and Output Format:
- **ChatMessage Format**: This component uses the ChatMessage format for structuring both input and output,
ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
ChatMessage format can be found at: https://github.com/openai/openai-python/blob/main/chatml.md.
```
{'replies':
[ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence that focuses on
enabling computers to understand, interpret, and generate human language in a way that is meaningful and useful.',
role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]
}
```
"""
# pylint: disable=super-init-not-called
@ -71,11 +77,11 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
generation_kwargs: Optional[Dict[str, Any]] = None,
):
"""
:param azure_endpoint: The endpoint of the deployed model, e.g. `https://example-resource.azure.openai.com/`
:param azure_endpoint: The endpoint of the deployed model, e.g. `"https://example-resource.azure.openai.com/"`
:param api_version: The version of the API to use. Defaults to 2023-05-15
:param azure_deployment: The deployment of the model, usually the model name.
:param api_key: The API key to use for authentication.
:param azure_ad_token: Azure Active Directory token, see https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
:param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id)
:param organization: The Organization ID, defaults to `None`. See
[production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
:param streaming_callback: A callback function that is called when a new token is received from the stream.
@ -138,7 +144,9 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
:return: The serialized component as a dictionary.
:returns:
The serialized component as a dictionary.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
return default_to_dict(
@ -157,8 +165,10 @@ class AzureOpenAIChatGenerator(OpenAIChatGenerator):
def from_dict(cls, data: Dict[str, Any]) -> "AzureOpenAIChatGenerator":
"""
Deserialize this component from a dictionary.
:param data: The dictionary representation of this component.
:return: The deserialized component instance.
:returns:
The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key", "azure_ad_token"])
init_params = data.get("init_parameters", {})

View File

@ -27,7 +27,6 @@ PIPELINE_SUPPORTED_TASKS = ["text-generation", "text2text-generation"]
@component
class HuggingFaceLocalChatGenerator:
"""
The `HuggingFaceLocalChatGenerator` class is a component designed for generating chat responses using models from
Hugging Face's model hub. It is tailored for local runtime text generation tasks and provides a convenient interface
for working with chat-based models, such as `HuggingFaceH4/zephyr-7b-beta` or `meta-llama/Llama-2-7b-chat-hf`
@ -42,15 +41,22 @@ class HuggingFaceLocalChatGenerator:
generator.warm_up()
messages = [ChatMessage.from_user("What's Natural Language Processing? Be brief.")]
print(generator.run(messages))
```
# {'replies': [ChatMessage(content=' Natural Language Processing (NLP) is a subfield of artificial
intelligence that deals with the interaction between computers and human language. It enables computers
to understand, interpret, and generate human language in a valuable way. NLP involves various techniques
such as speech recognition, text analysis, sentiment analysis, and machine translation. The ultimate goal
is to make it easier for computers to process and derive meaning from human language, improving communication
between humans and machines.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
meta={'finish_reason': 'stop', 'index': 0, 'model': 'mistralai/Mistral-7B-Instruct-v0.2',
'usage': {'completion_tokens': 90, 'prompt_tokens': 19, 'total_tokens': 109}})]}
```
{'replies':
[ChatMessage(content=' Natural Language Processing (NLP) is a subfield of artificial intelligence that deals
with the interaction between computers and human language. It enables computers to understand, interpret, and
generate human language in a valuable way. NLP involves various techniques such as speech recognition, text
analysis, sentiment analysis, and machine translation. The ultimate goal is to make it easier for computers to
process and derive meaning from human language, improving communication between humans and machines.',
role=<ChatRole.ASSISTANT: 'assistant'>,
name=None,
meta={'finish_reason': 'stop', 'index': 0, 'model':
'mistralai/Mistral-7B-Instruct-v0.2',
'usage': {'completion_tokens': 90, 'prompt_tokens': 19, 'total_tokens': 109}})
]
}
```
"""
@ -68,7 +74,7 @@ class HuggingFaceLocalChatGenerator:
):
"""
:param model: The name or path of a Hugging Face model for text generation,
for example, mistralai/Mistral-7B-Instruct-v0.2,T TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ, etc.
for example, `mistralai/Mistral-7B-Instruct-v0.2`, `TheBloke/OpenHermes-2.5-Mistral-7B-16k-AWQ`, etc.
The important aspect of the model is that it should be a chat model and that it supports ChatML messaging
format.
If the model is also specified in the `huggingface_pipeline_kwargs`, this parameter will be ignored.
@ -89,10 +95,10 @@ class HuggingFaceLocalChatGenerator:
or if you wish to use a custom template instead of the model's default, you can use this parameter to
set your preferred chat template.
:param generation_kwargs: A dictionary containing keyword arguments to customize text generation.
Some examples: `max_length`, `max_new_tokens`, `temperature`, `top_k`, `top_p`,...
Some examples: `max_length`, `max_new_tokens`, `temperature`, `top_k`, `top_p`, etc.
See Hugging Face's documentation for more information:
- https://huggingface.co/docs/transformers/main/en/generation_strategies#customize-text-generation
- https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig
- - [customize-text-generation](https://huggingface.co/docs/transformers/main/en/generation_strategies#customize-text-generation)
- - [GenerationConfig](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationConfig)
- The only generation_kwargs we set by default is max_new_tokens, which is set to 512 tokens.
:param huggingface_pipeline_kwargs: Dictionary containing keyword arguments used to initialize the
Hugging Face pipeline for text generation.
@ -100,9 +106,7 @@ class HuggingFaceLocalChatGenerator:
In case of duplication, these kwargs override `model`, `task`, `device`, and `token` init parameters.
See Hugging Face's [documentation](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.pipeline.task)
for more information on the available kwargs.
In this dictionary, you can also include `model_kwargs` to specify the kwargs
for model initialization:
https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained
In this dictionary, you can also include `model_kwargs` to specify the kwargs for [model initialization](https://huggingface.co/docs/transformers/en/main_classes/model#transformers.PreTrainedModel.from_pretrained)
:param stop_words: A list of stop words. If any one of the stop words is generated, the generation is stopped.
If you provide this parameter, you should not specify the `stopping_criteria` in `generation_kwargs`.
For some chat models, the output includes both the new text and the original prompt.
@ -169,12 +173,18 @@ class HuggingFaceLocalChatGenerator:
return {"model": f"[object of type {type(self.huggingface_pipeline_kwargs['model'])}]"}
def warm_up(self):
"""
Initializes the component.
"""
if self.pipeline is None:
self.pipeline = pipeline(**self.huggingface_pipeline_kwargs)
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
serialization_dict = default_to_dict(
@ -194,7 +204,12 @@ class HuggingFaceLocalChatGenerator:
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "HuggingFaceLocalChatGenerator":
"""
Deserialize this component from a dictionary.
Deserializes the component from a dictionary.
:param data:
The dictionary to deserialize from.
:returns:
The deserialized component.
"""
torch_and_transformers_import.check() # leave this, cls method
deserialize_secrets_inplace(data["init_parameters"], keys=["token"])
@ -214,7 +229,8 @@ class HuggingFaceLocalChatGenerator:
:param messages: A list of ChatMessage instances representing the input messages.
:param generation_kwargs: Additional keyword arguments for text generation.
:return: A list containing the generated responses as ChatMessage instances.
:returns:
A list containing the generated responses as ChatMessage instances.
"""
if self.pipeline is None:
raise RuntimeError("The generation model has not been loaded. Please call warm_up() before running.")
@ -287,7 +303,7 @@ class HuggingFaceLocalChatGenerator:
:param tokenizer: The tokenizer used for generation.
:param prompt: The prompt used for generation.
:param generation_kwargs: The generation parameters.
:return: A ChatMessage instance.
:returns: A ChatMessage instance.
"""
completion_tokens = len(tokenizer.encode(text, add_special_tokens=False))
prompt_token_count = len(tokenizer.encode(prompt, add_special_tokens=False))

View File

@ -24,7 +24,28 @@ class HuggingFaceTGIChatGenerator:
inference chat-based models deployed on the Text Generation Inference (TGI) backend.
You can use this component for chat LLMs hosted on Hugging Face inference endpoints, the rate-limited
Inference API tier:
Inference API tier.
Key Features and Compatibility:
- Primary Compatibility: designed to work seamlessly with any chat-based model deployed using the TGI
framework. For more information on TGI, visit [text-generation-inference](https://github.com/huggingface/text-generation-inference)
- Hugging Face Inference Endpoints: Supports inference of TGI chat LLMs deployed on Hugging Face
inference endpoints. For more details, refer to [inference-endpoints](https://huggingface.co/inference-endpoints)
- Inference API Support: supports inference of TGI chat LLMs hosted on the rate-limited Inference
API tier. Learn more about the Inference API at [inference-api](https://huggingface.co/inference-api).
Discover available chat models using the following command: `wget -qO- https://api-inference.huggingface.co/framework/text-generation-inference | grep chat`
and simply use the model ID as the model parameter for this component. You'll also need to provide a valid
Hugging Face API token as the token parameter.
- Custom TGI Endpoints: supports inference of TGI chat LLMs deployed on custom TGI endpoints. Anyone can
deploy their own TGI endpoint using the TGI framework. For more details, refer to [inference-endpoints](https://huggingface.co/inference-endpoints)
Input and Output Format:
- ChatMessage Format: This component uses the ChatMessage format to structure both input and output,
ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
```python
from haystack.components.generators.chat import HuggingFaceTGIChatGenerator
@ -58,29 +79,6 @@ class HuggingFaceTGIChatGenerator:
response = client.run(messages, generation_kwargs={"max_new_tokens": 120})
print(response)
```
Key Features and Compatibility:
- **Primary Compatibility**: Designed to work seamlessly with any chat-based model deployed using the TGI
framework. For more information on TGI, visit https://github.com/huggingface/text-generation-inference.
- **Hugging Face Inference Endpoints**: Supports inference of TGI chat LLMs deployed on Hugging Face
inference endpoints. For more details, refer to https://huggingface.co/inference-endpoints.
- **Inference API Support**: Supports inference of TGI chat LLMs hosted on the rate-limited Inference
API tier. Learn more about the Inference API at https://huggingface.co/inference-api.
Discover available chat models using the following command:
```
wget -qO- https://api-inference.huggingface.co/framework/text-generation-inference | grep chat
```
and simply use the model ID as the model parameter for this component. You'll also need to provide a valid
Hugging Face API token as the token parameter.
- **Custom TGI Endpoints**: Supports inference of TGI chat LLMs deployed on custom TGI endpoints. Anyone can
deploy their own TGI endpoint using the TGI framework. For more details, refer
to https://huggingface.co/inference-endpoints.
Input and Output Format:
- **ChatMessage Format**: This component uses the ChatMessage format to structure both input and output,
ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
ChatMessage format can be found at https://github.com/openai/openai-python/blob/main/chatml.md.
"""
def __init__(

View File

@ -19,11 +19,11 @@ logger = logging.getLogger(__name__)
@component
class OpenAIChatGenerator:
"""
Enables text generation using OpenAI's large language models (LLMs). It supports gpt-4 and gpt-3.5-turbo
Enables text generation using OpenAI's large language models (LLMs). It supports `gpt-4` and `gpt-3.5-turbo`
family of models accessed through the chat completions API endpoint.
Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
directly to this component via the `**generation_kwargs` parameter in __init__ or the `**generation_kwargs`
directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
parameter in `run` method.
For more details on the parameters supported by the OpenAI API, refer to the OpenAI
@ -38,25 +38,29 @@ class OpenAIChatGenerator:
client = OpenAIChatGenerator()
response = client.run(messages)
print(response)
>>{'replies': [ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
>>meaningful and useful.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
>>meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
```
Output:
```
{'replies':
[ChatMessage(content='Natural Language Processing (NLP) is a branch of artificial intelligence
that focuses on enabling computers to understand, interpret, and generate human language in
a way that is meaningful and useful.',
role=<ChatRole.ASSISTANT: 'assistant'>, name=None,
meta={'model': 'gpt-3.5-turbo-0613', 'index': 0, 'finish_reason': 'stop',
'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})
]
}
```
Key Features and Compatibility:
- **Primary Compatibility**: Designed to work seamlessly with the OpenAI API Chat Completion endpoint
and gpt-4 and gpt-3.5-turbo family of models.
- **Streaming Support**: Supports streaming responses from the OpenAI API Chat Completion endpoint.
- **Customizability**: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
- Primary Compatibility: designed to work seamlessly with the OpenAI API Chat Completion endpoint and `gpt-4` and `gpt-3.5-turbo` family of models.
- Streaming Support: supports streaming responses from the OpenAI API Chat Completion endpoint.
- Customizability: supports all parameters supported by the OpenAI API Chat Completion endpoint.
Input and Output Format:
- **ChatMessage Format**: This component uses the ChatMessage format for structuring both input and output,
ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
ChatMessage format can be found at: https://github.com/openai/openai-python/blob/main/chatml.md.
- ChatMessage Format: this component uses the ChatMessage format for structuring both input and output,
ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
ChatMessage format can be found at [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
"""
def __init__(
@ -116,7 +120,9 @@ class OpenAIChatGenerator:
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.
:return: The serialized component as a dictionary.
:returns:
The serialized component as a dictionary.
"""
callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
return default_to_dict(
@ -133,8 +139,10 @@ class OpenAIChatGenerator:
def from_dict(cls, data: Dict[str, Any]) -> "OpenAIChatGenerator":
"""
Deserialize this component from a dictionary.
:param data: The dictionary representation of this component.
:return: The deserialized component instance.
:returns:
The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
init_params = data.get("init_parameters", {})
@ -150,10 +158,12 @@ class OpenAIChatGenerator:
:param messages: A list of ChatMessage instances representing the input messages.
:param generation_kwargs: Additional keyword arguments for text generation. These parameters will
potentially override the parameters passed in the __init__ method.
For more details on the parameters supported by the OpenAI API, refer to the
OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create).
:return: A list containing the generated responses as ChatMessage instances.
potentially override the parameters passed in the `__init__` method.
For more details on the parameters supported by the OpenAI API, refer to the
OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create).
:returns:
A list containing the generated responses as ChatMessage instances.
"""
# update generation kwargs by merging with the generation kwargs passed to the run method