chore: adjust docstrings in the audio package (#7246)

* adjust docstrings in the audio package * Apply suggestions from code review Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Update haystack/components/audio/whisper_remote.py * black complaining for apparently no reason --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
2025-09-20 05:33:23 +00:00 · 2024-02-28 18:22:03 +01:00 · 2024-02-28 18:22:03 +01:00 · 890c613a2c
commit 890c613a2c
parent e5f0e248b6
2 changed files with 86 additions and 40 deletions
--- a/haystack/components/audio/whisper_local.py
+++ b/haystack/components/audio/whisper_local.py
@ -20,11 +20,20 @@ WhisperLocalModel = Literal["tiny", "small", "medium", "large", "large-v2"]
@component
 class LocalWhisperTranscriber:
    """
-    Transcribes audio files using OpenAI's Whisper's model on your local machine.
+    Transcribes audio files using OpenAI's Whisper model in your local machine.
    For the supported audio formats, languages, and other parameters, see the
    [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
-    [github repo](https://github.com/openai/whisper).
+    [github repository](https://github.com/openai/whisper).
    Usage example:
    ```python
    from haystack.components.audio import LocalWhisperTranscriber
    whisper = LocalWhisperTranscriber(model="small")
    whisper.warm_up()
    transcription = whisper.run(audio_files=["path/to/audio/file"])
    ```
    """
    def __init__(
@ -34,10 +43,14 @@ class LocalWhisperTranscriber:
        whisper_params: Optional[Dict[str, Any]] = None,
    ):
        """
-        :param model: Name of the model to use. Set it to one of the following values:
+        Creates an instance of the LocalWhisperTranscriber component.
-        :type model: Literal["tiny", "small", "medium", "large", "large-v2"]
+
-        :param device: The device on which the model is loaded. If `None`, the default device is automatically
+        :param model:
-            selected.
+            Name of the model to use. Set it to one of the following values:
        :type model:
            Literal["tiny", "small", "medium", "large", "large-v2"]
        :param device:
            The device on which the model is loaded. If `None`, the default device is automatically selected.
        """
        whisper_import.check()
        if model not in get_args(WhisperLocalModel):
@ -51,21 +64,29 @@ class LocalWhisperTranscriber:
    def warm_up(self) -> None:
        """
-        Loads the model.
+        Loads the model in memory.
        """
        if not self._model:
            self._model = whisper.load_model(self.model, device=self.device.to_torch())
    def to_dict(self) -> Dict[str, Any]:
        """
-        Serialize this component to a dictionary.
+        Serializes the component to a dictionary.
        :returns:
            Dictionary with serialized data.
        """
        return default_to_dict(self, model=self.model, device=self.device.to_dict(), whisper_params=self.whisper_params)
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber":
        """
-        Create a `LocalWhisperTranscriber` instance from a dictionary.
+        Deserializes the component from a dictionary.
        :param data:
            The dictionary to deserialize from.
        :returns:
            The deserialized component.
        """
        serialized_device = data["init_parameters"]["device"]
        data["init_parameters"]["device"] = ComponentDevice.from_dict(serialized_device)
@ -75,17 +96,19 @@ class LocalWhisperTranscriber:
    @component.output_types(documents=List[Document])
    def run(self, sources: List[Union[str, Path, ByteStream]], whisper_params: Optional[Dict[str, Any]] = None):
        """
-        Transcribe the audio files into a list of Documents, one for each input file.
+        Transcribes the audio files into a list of Documents, one for each input file.
        For the supported audio formats, languages, and other parameters, see the
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
        [github repo](https://github.com/openai/whisper).
-        :param audio_files: A list of paths or binary streams to transcribe.
+        :param audio_files:
-        :returns: A list of Documents, one for each file. The content of the document is the transcription text,
+            A list of paths or binary streams to transcribe.
-            while the document's metadata contains all the other values returned by the Whisper model, such as the
+
-            alignment data. Another key called `audio_file` contains the path to the audio file used for the
+        :returns: A dictionary with the following keys:
-            transcription.
+            - `documents`: A list of Documents, one for each file. The content of the document is the transcription text,
                while the document's metadata contains the values returned by the Whisper model, such as the
                alignment data and the path to the audio file used for the transcription.
        """
        if self._model is None:
            raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")
@ -98,17 +121,16 @@ class LocalWhisperTranscriber:
    def transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> List[Document]:
        """
-        Transcribe the audio files into a list of Documents, one for each input file.
+        Transcribes the audio files into a list of Documents, one for each input file.
        For the supported audio formats, languages, and other parameters, see the
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
        [github repo](https://github.com/openai/whisper).
-        :param audio_files: A list of paths or binary streams to transcribe.
+        :param audio_files:
-        :returns: A list of Documents, one for each file. The content of the document is the transcription text,
+            A list of paths or binary streams to transcribe.
-            while the document's metadata contains all the other values returned by the Whisper model, such as the
+        :returns:
-            alignment data. Another key called `audio_file` contains the path to the audio file used for the
+            A list of Documents, one for each file.
            transcription.
        """
        transcriptions = self._raw_transcribe(sources, **kwargs)
        documents = []
@ -120,14 +142,16 @@ class LocalWhisperTranscriber:
    def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> Dict[Path, Any]:
        """
-        Transcribe the given audio files. Returns the output of the model, a dictionary, for each input file.
+        Transcribes the given audio files. Returns the output of the model, a dictionary, for each input file.
        For the supported audio formats, languages, and other parameters, see the
        [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
        [github repo](https://github.com/openai/whisper).
-        :param audio_files: A list of paths or binary streams to transcribe.
+        :param audio_files:
-        :returns: A dictionary of  file_path -> transcription.
+            A list of paths or binary streams to transcribe.
        :returns:
            A dictionary mapping 'file_path' to 'transcription'.
        """
        if self._model is None:
            raise ComponentError("Model is not loaded, please run 'warm_up()' before calling 'run()'")
--- a/haystack/components/audio/whisper_remote.py
+++ b/haystack/components/audio/whisper_remote.py
@ -15,12 +15,20 @@ logger = logging.getLogger(__name__)
@component
 class RemoteWhisperTranscriber:
    """
-    Transcribes audio files using OpenAI's Whisper using OpenAI API. Requires an API key. See the
+    Transcribes audio files using the Whisper API from OpenAI.
    [OpenAI blog post](https://beta.openai.com/docs/api-reference/whisper) for more details.
    You can get one by signing up for an [OpenAI account](https://beta.openai.com/).
    The component requires an API key, see the relative
    [OpenAI documentation](https://platform.openai.com/docs/api-reference/authentication) for more details.
    For the supported audio formats, languages, and other parameters, see the
    [Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text)
    Usage example:
    ```python
    from haystack.components.audio import RemoteWhisperTranscriber
    whisper = RemoteWhisperTranscriber(api_key=Secret.from_token("<your-api-key>"), model="tiny")
    transcription = whisper.run(sources=["path/to/audio/file"])
    ```
    """
    def __init__(
@ -32,14 +40,19 @@ class RemoteWhisperTranscriber:
        **kwargs,
    ):
        """
-        Transcribes a list of audio files into a list of Documents.
+        Creates an instance of the RemoteWhisperTranscriber component.
-        :param api_key: OpenAI API key.
+        :param api_key:
-        :param model: Name of the model to use. It now accepts only `whisper-1`.
+            OpenAI API key.
-        :param organization: The Organization ID, defaults to `None`. See
+        :param model:
            Name of the model to use. It now accepts only `whisper-1`.
        :param organization:
            The Organization ID. See
        [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
-        :param api_base: An optional URL to use as the API base. Defaults to `None`. See OpenAI [docs](https://platform.openai.com/docs/api-reference/audio).
+        :param api_base:
-        :param kwargs: Other parameters to use for the model. These parameters are all sent directly to the OpenAI
+            An optional URL to use as the API base. See OpenAI [docs](https://platform.openai.com/docs/api-reference/audio).
        :param kwargs:
            Other parameters to use for the model. These parameters are all sent directly to the OpenAI
            endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
            Some of the supported parameters:
            - `language`: The language of the input audio.
@ -77,9 +90,10 @@ class RemoteWhisperTranscriber:
    def to_dict(self) -> Dict[str, Any]:
        """
-        Serialize this component to a dictionary.
+        Serializes the component to a dictionary.
-        This method overrides the default serializer in order to
+
-        avoid leaking the `api_key` value passed to the constructor.
+        :returns:
            Dictionary with serialized data.
        """
        return default_to_dict(
            self,
@ -93,7 +107,12 @@ class RemoteWhisperTranscriber:
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
        """
-        Deserialize this component from a dictionary.
+        Deserializes the component from a dictionary.
        :param data:
            The dictionary to deserialize from.
        :returns:
            The deserialized component.
        """
        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
        return default_from_dict(cls, data)
@ -101,10 +120,13 @@ class RemoteWhisperTranscriber:
    @component.output_types(documents=List[Document])
    def run(self, sources: List[Union[str, Path, ByteStream]]):
        """
-        Transcribe the audio files into a list of Documents, one for each input file.
+        Transcribes the audio files into a list of Documents, one for each input file.
-        :param sources: A list of file paths or ByteStreams containing the audio files to transcribe.
+        :param sources:
-        :returns: A list of Documents, one for each file. The content of the document is the transcription text.
+            A list of file paths or ByteStreams containing the audio files to transcribe.
        :returns: A dictionary with the following keys:
            - `documents`: A list of Documents, one for each file. The content of the document is the transcribed text.
        """
        documents = []