chore: adjust docstrings in the audio package (#7246)

* adjust docstrings in the audio package

* Apply suggestions from code review

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>

* Update haystack/components/audio/whisper_remote.py

* black complaining for apparently no reason

---------

Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
This commit is contained in:
Massimiliano Pippi 2024-02-28 18:22:03 +01:00 committed by GitHub
parent e5f0e248b6
commit 890c613a2c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 86 additions and 40 deletions

View File

@ -20,11 +20,20 @@ WhisperLocalModel = Literal["tiny", "small", "medium", "large", "large-v2"]
@component @component
class LocalWhisperTranscriber: class LocalWhisperTranscriber:
""" """
Transcribes audio files using OpenAI's Whisper's model on your local machine. Transcribes audio files using OpenAI's Whisper model in your local machine.
For the supported audio formats, languages, and other parameters, see the For the supported audio formats, languages, and other parameters, see the
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
[github repo](https://github.com/openai/whisper). [github repository](https://github.com/openai/whisper).
Usage example:
```python
from haystack.components.audio import LocalWhisperTranscriber
whisper = LocalWhisperTranscriber(model="small")
whisper.warm_up()
transcription = whisper.run(audio_files=["path/to/audio/file"])
```
""" """
def __init__( def __init__(
@ -34,10 +43,14 @@ class LocalWhisperTranscriber:
whisper_params: Optional[Dict[str, Any]] = None, whisper_params: Optional[Dict[str, Any]] = None,
): ):
""" """
:param model: Name of the model to use. Set it to one of the following values: Creates an instance of the LocalWhisperTranscriber component.
:type model: Literal["tiny", "small", "medium", "large", "large-v2"]
:param device: The device on which the model is loaded. If `None`, the default device is automatically :param model:
selected. Name of the model to use. Set it to one of the following values:
:type model:
Literal["tiny", "small", "medium", "large", "large-v2"]
:param device:
The device on which the model is loaded. If `None`, the default device is automatically selected.
""" """
whisper_import.check() whisper_import.check()
if model not in get_args(WhisperLocalModel): if model not in get_args(WhisperLocalModel):
@ -51,21 +64,29 @@ class LocalWhisperTranscriber:
def warm_up(self) -> None: def warm_up(self) -> None:
""" """
Loads the model. Loads the model in memory.
""" """
if not self._model: if not self._model:
self._model = whisper.load_model(self.model, device=self.device.to_torch()) self._model = whisper.load_model(self.model, device=self.device.to_torch())
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
""" """
Serialize this component to a dictionary. Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
""" """
return default_to_dict(self, model=self.model, device=self.device.to_dict(), whisper_params=self.whisper_params) return default_to_dict(self, model=self.model, device=self.device.to_dict(), whisper_params=self.whisper_params)
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber": def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber":
""" """
Create a `LocalWhisperTranscriber` instance from a dictionary. Deserializes the component from a dictionary.
:param data:
The dictionary to deserialize from.
:returns:
The deserialized component.
""" """
serialized_device = data["init_parameters"]["device"] serialized_device = data["init_parameters"]["device"]
data["init_parameters"]["device"] = ComponentDevice.from_dict(serialized_device) data["init_parameters"]["device"] = ComponentDevice.from_dict(serialized_device)
@ -75,17 +96,19 @@ class LocalWhisperTranscriber:
@component.output_types(documents=List[Document]) @component.output_types(documents=List[Document])
def run(self, sources: List[Union[str, Path, ByteStream]], whisper_params: Optional[Dict[str, Any]] = None): def run(self, sources: List[Union[str, Path, ByteStream]], whisper_params: Optional[Dict[str, Any]] = None):
""" """
Transcribe the audio files into a list of Documents, one for each input file. Transcribes the audio files into a list of Documents, one for each input file.
For the supported audio formats, languages, and other parameters, see the For the supported audio formats, languages, and other parameters, see the
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
[github repo](https://github.com/openai/whisper). [github repo](https://github.com/openai/whisper).
:param audio_files: A list of paths or binary streams to transcribe. :param audio_files:
:returns: A list of Documents, one for each file. The content of the document is the transcription text, A list of paths or binary streams to transcribe.
while the document's metadata contains all the other values returned by the Whisper model, such as the
alignment data. Another key called `audio_file` contains the path to the audio file used for the :returns: A dictionary with the following keys:
transcription. - `documents`: A list of Documents, one for each file. The content of the document is the transcription text,
while the document's metadata contains the values returned by the Whisper model, such as the
alignment data and the path to the audio file used for the transcription.
""" """
if self._model is None: if self._model is None:
raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.") raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")
@ -98,17 +121,16 @@ class LocalWhisperTranscriber:
def transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> List[Document]: def transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> List[Document]:
""" """
Transcribe the audio files into a list of Documents, one for each input file. Transcribes the audio files into a list of Documents, one for each input file.
For the supported audio formats, languages, and other parameters, see the For the supported audio formats, languages, and other parameters, see the
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
[github repo](https://github.com/openai/whisper). [github repo](https://github.com/openai/whisper).
:param audio_files: A list of paths or binary streams to transcribe. :param audio_files:
:returns: A list of Documents, one for each file. The content of the document is the transcription text, A list of paths or binary streams to transcribe.
while the document's metadata contains all the other values returned by the Whisper model, such as the :returns:
alignment data. Another key called `audio_file` contains the path to the audio file used for the A list of Documents, one for each file.
transcription.
""" """
transcriptions = self._raw_transcribe(sources, **kwargs) transcriptions = self._raw_transcribe(sources, **kwargs)
documents = [] documents = []
@ -120,14 +142,16 @@ class LocalWhisperTranscriber:
def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> Dict[Path, Any]: def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> Dict[Path, Any]:
""" """
Transcribe the given audio files. Returns the output of the model, a dictionary, for each input file. Transcribes the given audio files. Returns the output of the model, a dictionary, for each input file.
For the supported audio formats, languages, and other parameters, see the For the supported audio formats, languages, and other parameters, see the
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
[github repo](https://github.com/openai/whisper). [github repo](https://github.com/openai/whisper).
:param audio_files: A list of paths or binary streams to transcribe. :param audio_files:
:returns: A dictionary of file_path -> transcription. A list of paths or binary streams to transcribe.
:returns:
A dictionary mapping 'file_path' to 'transcription'.
""" """
if self._model is None: if self._model is None:
raise ComponentError("Model is not loaded, please run 'warm_up()' before calling 'run()'") raise ComponentError("Model is not loaded, please run 'warm_up()' before calling 'run()'")

View File

@ -15,12 +15,20 @@ logger = logging.getLogger(__name__)
@component @component
class RemoteWhisperTranscriber: class RemoteWhisperTranscriber:
""" """
Transcribes audio files using OpenAI's Whisper using OpenAI API. Requires an API key. See the Transcribes audio files using the Whisper API from OpenAI.
[OpenAI blog post](https://beta.openai.com/docs/api-reference/whisper) for more details.
You can get one by signing up for an [OpenAI account](https://beta.openai.com/).
The component requires an API key, see the relative
[OpenAI documentation](https://platform.openai.com/docs/api-reference/authentication) for more details.
For the supported audio formats, languages, and other parameters, see the For the supported audio formats, languages, and other parameters, see the
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text)
Usage example:
```python
from haystack.components.audio import RemoteWhisperTranscriber
whisper = RemoteWhisperTranscriber(api_key=Secret.from_token("<your-api-key>"), model="tiny")
transcription = whisper.run(sources=["path/to/audio/file"])
```
""" """
def __init__( def __init__(
@ -32,14 +40,19 @@ class RemoteWhisperTranscriber:
**kwargs, **kwargs,
): ):
""" """
Transcribes a list of audio files into a list of Documents. Creates an instance of the RemoteWhisperTranscriber component.
:param api_key: OpenAI API key. :param api_key:
:param model: Name of the model to use. It now accepts only `whisper-1`. OpenAI API key.
:param organization: The Organization ID, defaults to `None`. See :param model:
Name of the model to use. It now accepts only `whisper-1`.
:param organization:
The Organization ID. See
[production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization). [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
:param api_base: An optional URL to use as the API base. Defaults to `None`. See OpenAI [docs](https://platform.openai.com/docs/api-reference/audio). :param api_base:
:param kwargs: Other parameters to use for the model. These parameters are all sent directly to the OpenAI An optional URL to use as the API base. See OpenAI [docs](https://platform.openai.com/docs/api-reference/audio).
:param kwargs:
Other parameters to use for the model. These parameters are all sent directly to the OpenAI
endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details. endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
Some of the supported parameters: Some of the supported parameters:
- `language`: The language of the input audio. - `language`: The language of the input audio.
@ -77,9 +90,10 @@ class RemoteWhisperTranscriber:
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
""" """
Serialize this component to a dictionary. Serializes the component to a dictionary.
This method overrides the default serializer in order to
avoid leaking the `api_key` value passed to the constructor. :returns:
Dictionary with serialized data.
""" """
return default_to_dict( return default_to_dict(
self, self,
@ -93,7 +107,12 @@ class RemoteWhisperTranscriber:
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber": def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
""" """
Deserialize this component from a dictionary. Deserializes the component from a dictionary.
:param data:
The dictionary to deserialize from.
:returns:
The deserialized component.
""" """
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data) return default_from_dict(cls, data)
@ -101,10 +120,13 @@ class RemoteWhisperTranscriber:
@component.output_types(documents=List[Document]) @component.output_types(documents=List[Document])
def run(self, sources: List[Union[str, Path, ByteStream]]): def run(self, sources: List[Union[str, Path, ByteStream]]):
""" """
Transcribe the audio files into a list of Documents, one for each input file. Transcribes the audio files into a list of Documents, one for each input file.
:param sources: A list of file paths or ByteStreams containing the audio files to transcribe. :param sources:
:returns: A list of Documents, one for each file. The content of the document is the transcription text. A list of file paths or ByteStreams containing the audio files to transcribe.
:returns: A dictionary with the following keys:
- `documents`: A list of Documents, one for each file. The content of the document is the transcribed text.
""" """
documents = [] documents = []