mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-09-20 13:43:34 +00:00
chore: adjust docstrings in the audio package (#7246)
* adjust docstrings in the audio package * Apply suggestions from code review Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> * Update haystack/components/audio/whisper_remote.py * black complaining for apparently no reason --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
This commit is contained in:
parent
e5f0e248b6
commit
890c613a2c
@ -20,11 +20,20 @@ WhisperLocalModel = Literal["tiny", "small", "medium", "large", "large-v2"]
|
|||||||
@component
|
@component
|
||||||
class LocalWhisperTranscriber:
|
class LocalWhisperTranscriber:
|
||||||
"""
|
"""
|
||||||
Transcribes audio files using OpenAI's Whisper's model on your local machine.
|
Transcribes audio files using OpenAI's Whisper model in your local machine.
|
||||||
|
|
||||||
For the supported audio formats, languages, and other parameters, see the
|
For the supported audio formats, languages, and other parameters, see the
|
||||||
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
||||||
[github repo](https://github.com/openai/whisper).
|
[github repository](https://github.com/openai/whisper).
|
||||||
|
|
||||||
|
Usage example:
|
||||||
|
```python
|
||||||
|
from haystack.components.audio import LocalWhisperTranscriber
|
||||||
|
|
||||||
|
whisper = LocalWhisperTranscriber(model="small")
|
||||||
|
whisper.warm_up()
|
||||||
|
transcription = whisper.run(audio_files=["path/to/audio/file"])
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -34,10 +43,14 @@ class LocalWhisperTranscriber:
|
|||||||
whisper_params: Optional[Dict[str, Any]] = None,
|
whisper_params: Optional[Dict[str, Any]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
:param model: Name of the model to use. Set it to one of the following values:
|
Creates an instance of the LocalWhisperTranscriber component.
|
||||||
:type model: Literal["tiny", "small", "medium", "large", "large-v2"]
|
|
||||||
:param device: The device on which the model is loaded. If `None`, the default device is automatically
|
:param model:
|
||||||
selected.
|
Name of the model to use. Set it to one of the following values:
|
||||||
|
:type model:
|
||||||
|
Literal["tiny", "small", "medium", "large", "large-v2"]
|
||||||
|
:param device:
|
||||||
|
The device on which the model is loaded. If `None`, the default device is automatically selected.
|
||||||
"""
|
"""
|
||||||
whisper_import.check()
|
whisper_import.check()
|
||||||
if model not in get_args(WhisperLocalModel):
|
if model not in get_args(WhisperLocalModel):
|
||||||
@ -51,21 +64,29 @@ class LocalWhisperTranscriber:
|
|||||||
|
|
||||||
def warm_up(self) -> None:
|
def warm_up(self) -> None:
|
||||||
"""
|
"""
|
||||||
Loads the model.
|
Loads the model in memory.
|
||||||
"""
|
"""
|
||||||
if not self._model:
|
if not self._model:
|
||||||
self._model = whisper.load_model(self.model, device=self.device.to_torch())
|
self._model = whisper.load_model(self.model, device=self.device.to_torch())
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Serialize this component to a dictionary.
|
Serializes the component to a dictionary.
|
||||||
|
|
||||||
|
:returns:
|
||||||
|
Dictionary with serialized data.
|
||||||
"""
|
"""
|
||||||
return default_to_dict(self, model=self.model, device=self.device.to_dict(), whisper_params=self.whisper_params)
|
return default_to_dict(self, model=self.model, device=self.device.to_dict(), whisper_params=self.whisper_params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber":
|
def from_dict(cls, data: Dict[str, Any]) -> "LocalWhisperTranscriber":
|
||||||
"""
|
"""
|
||||||
Create a `LocalWhisperTranscriber` instance from a dictionary.
|
Deserializes the component from a dictionary.
|
||||||
|
|
||||||
|
:param data:
|
||||||
|
The dictionary to deserialize from.
|
||||||
|
:returns:
|
||||||
|
The deserialized component.
|
||||||
"""
|
"""
|
||||||
serialized_device = data["init_parameters"]["device"]
|
serialized_device = data["init_parameters"]["device"]
|
||||||
data["init_parameters"]["device"] = ComponentDevice.from_dict(serialized_device)
|
data["init_parameters"]["device"] = ComponentDevice.from_dict(serialized_device)
|
||||||
@ -75,17 +96,19 @@ class LocalWhisperTranscriber:
|
|||||||
@component.output_types(documents=List[Document])
|
@component.output_types(documents=List[Document])
|
||||||
def run(self, sources: List[Union[str, Path, ByteStream]], whisper_params: Optional[Dict[str, Any]] = None):
|
def run(self, sources: List[Union[str, Path, ByteStream]], whisper_params: Optional[Dict[str, Any]] = None):
|
||||||
"""
|
"""
|
||||||
Transcribe the audio files into a list of Documents, one for each input file.
|
Transcribes the audio files into a list of Documents, one for each input file.
|
||||||
|
|
||||||
For the supported audio formats, languages, and other parameters, see the
|
For the supported audio formats, languages, and other parameters, see the
|
||||||
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
||||||
[github repo](https://github.com/openai/whisper).
|
[github repo](https://github.com/openai/whisper).
|
||||||
|
|
||||||
:param audio_files: A list of paths or binary streams to transcribe.
|
:param audio_files:
|
||||||
:returns: A list of Documents, one for each file. The content of the document is the transcription text,
|
A list of paths or binary streams to transcribe.
|
||||||
while the document's metadata contains all the other values returned by the Whisper model, such as the
|
|
||||||
alignment data. Another key called `audio_file` contains the path to the audio file used for the
|
:returns: A dictionary with the following keys:
|
||||||
transcription.
|
- `documents`: A list of Documents, one for each file. The content of the document is the transcription text,
|
||||||
|
while the document's metadata contains the values returned by the Whisper model, such as the
|
||||||
|
alignment data and the path to the audio file used for the transcription.
|
||||||
"""
|
"""
|
||||||
if self._model is None:
|
if self._model is None:
|
||||||
raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")
|
raise ComponentError("The component was not warmed up. Run 'warm_up()' before calling 'run()'.")
|
||||||
@ -98,17 +121,16 @@ class LocalWhisperTranscriber:
|
|||||||
|
|
||||||
def transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> List[Document]:
|
def transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> List[Document]:
|
||||||
"""
|
"""
|
||||||
Transcribe the audio files into a list of Documents, one for each input file.
|
Transcribes the audio files into a list of Documents, one for each input file.
|
||||||
|
|
||||||
For the supported audio formats, languages, and other parameters, see the
|
For the supported audio formats, languages, and other parameters, see the
|
||||||
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
||||||
[github repo](https://github.com/openai/whisper).
|
[github repo](https://github.com/openai/whisper).
|
||||||
|
|
||||||
:param audio_files: A list of paths or binary streams to transcribe.
|
:param audio_files:
|
||||||
:returns: A list of Documents, one for each file. The content of the document is the transcription text,
|
A list of paths or binary streams to transcribe.
|
||||||
while the document's metadata contains all the other values returned by the Whisper model, such as the
|
:returns:
|
||||||
alignment data. Another key called `audio_file` contains the path to the audio file used for the
|
A list of Documents, one for each file.
|
||||||
transcription.
|
|
||||||
"""
|
"""
|
||||||
transcriptions = self._raw_transcribe(sources, **kwargs)
|
transcriptions = self._raw_transcribe(sources, **kwargs)
|
||||||
documents = []
|
documents = []
|
||||||
@ -120,14 +142,16 @@ class LocalWhisperTranscriber:
|
|||||||
|
|
||||||
def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> Dict[Path, Any]:
|
def _raw_transcribe(self, sources: List[Union[str, Path, ByteStream]], **kwargs) -> Dict[Path, Any]:
|
||||||
"""
|
"""
|
||||||
Transcribe the given audio files. Returns the output of the model, a dictionary, for each input file.
|
Transcribes the given audio files. Returns the output of the model, a dictionary, for each input file.
|
||||||
|
|
||||||
For the supported audio formats, languages, and other parameters, see the
|
For the supported audio formats, languages, and other parameters, see the
|
||||||
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text) and the official Whisper
|
||||||
[github repo](https://github.com/openai/whisper).
|
[github repo](https://github.com/openai/whisper).
|
||||||
|
|
||||||
:param audio_files: A list of paths or binary streams to transcribe.
|
:param audio_files:
|
||||||
:returns: A dictionary of file_path -> transcription.
|
A list of paths or binary streams to transcribe.
|
||||||
|
:returns:
|
||||||
|
A dictionary mapping 'file_path' to 'transcription'.
|
||||||
"""
|
"""
|
||||||
if self._model is None:
|
if self._model is None:
|
||||||
raise ComponentError("Model is not loaded, please run 'warm_up()' before calling 'run()'")
|
raise ComponentError("Model is not loaded, please run 'warm_up()' before calling 'run()'")
|
||||||
|
@ -15,12 +15,20 @@ logger = logging.getLogger(__name__)
|
|||||||
@component
|
@component
|
||||||
class RemoteWhisperTranscriber:
|
class RemoteWhisperTranscriber:
|
||||||
"""
|
"""
|
||||||
Transcribes audio files using OpenAI's Whisper using OpenAI API. Requires an API key. See the
|
Transcribes audio files using the Whisper API from OpenAI.
|
||||||
[OpenAI blog post](https://beta.openai.com/docs/api-reference/whisper) for more details.
|
|
||||||
You can get one by signing up for an [OpenAI account](https://beta.openai.com/).
|
|
||||||
|
|
||||||
|
The component requires an API key, see the relative
|
||||||
|
[OpenAI documentation](https://platform.openai.com/docs/api-reference/authentication) for more details.
|
||||||
For the supported audio formats, languages, and other parameters, see the
|
For the supported audio formats, languages, and other parameters, see the
|
||||||
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text)
|
[Whisper API documentation](https://platform.openai.com/docs/guides/speech-to-text)
|
||||||
|
|
||||||
|
Usage example:
|
||||||
|
```python
|
||||||
|
from haystack.components.audio import RemoteWhisperTranscriber
|
||||||
|
|
||||||
|
whisper = RemoteWhisperTranscriber(api_key=Secret.from_token("<your-api-key>"), model="tiny")
|
||||||
|
transcription = whisper.run(sources=["path/to/audio/file"])
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -32,14 +40,19 @@ class RemoteWhisperTranscriber:
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Transcribes a list of audio files into a list of Documents.
|
Creates an instance of the RemoteWhisperTranscriber component.
|
||||||
|
|
||||||
:param api_key: OpenAI API key.
|
:param api_key:
|
||||||
:param model: Name of the model to use. It now accepts only `whisper-1`.
|
OpenAI API key.
|
||||||
:param organization: The Organization ID, defaults to `None`. See
|
:param model:
|
||||||
|
Name of the model to use. It now accepts only `whisper-1`.
|
||||||
|
:param organization:
|
||||||
|
The Organization ID. See
|
||||||
[production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
|
[production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
|
||||||
:param api_base: An optional URL to use as the API base. Defaults to `None`. See OpenAI [docs](https://platform.openai.com/docs/api-reference/audio).
|
:param api_base:
|
||||||
:param kwargs: Other parameters to use for the model. These parameters are all sent directly to the OpenAI
|
An optional URL to use as the API base. See OpenAI [docs](https://platform.openai.com/docs/api-reference/audio).
|
||||||
|
:param kwargs:
|
||||||
|
Other parameters to use for the model. These parameters are all sent directly to the OpenAI
|
||||||
endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
|
endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/audio) for more details.
|
||||||
Some of the supported parameters:
|
Some of the supported parameters:
|
||||||
- `language`: The language of the input audio.
|
- `language`: The language of the input audio.
|
||||||
@ -77,9 +90,10 @@ class RemoteWhisperTranscriber:
|
|||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Serialize this component to a dictionary.
|
Serializes the component to a dictionary.
|
||||||
This method overrides the default serializer in order to
|
|
||||||
avoid leaking the `api_key` value passed to the constructor.
|
:returns:
|
||||||
|
Dictionary with serialized data.
|
||||||
"""
|
"""
|
||||||
return default_to_dict(
|
return default_to_dict(
|
||||||
self,
|
self,
|
||||||
@ -93,7 +107,12 @@ class RemoteWhisperTranscriber:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
|
def from_dict(cls, data: Dict[str, Any]) -> "RemoteWhisperTranscriber":
|
||||||
"""
|
"""
|
||||||
Deserialize this component from a dictionary.
|
Deserializes the component from a dictionary.
|
||||||
|
|
||||||
|
:param data:
|
||||||
|
The dictionary to deserialize from.
|
||||||
|
:returns:
|
||||||
|
The deserialized component.
|
||||||
"""
|
"""
|
||||||
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
|
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
|
||||||
return default_from_dict(cls, data)
|
return default_from_dict(cls, data)
|
||||||
@ -101,10 +120,13 @@ class RemoteWhisperTranscriber:
|
|||||||
@component.output_types(documents=List[Document])
|
@component.output_types(documents=List[Document])
|
||||||
def run(self, sources: List[Union[str, Path, ByteStream]]):
|
def run(self, sources: List[Union[str, Path, ByteStream]]):
|
||||||
"""
|
"""
|
||||||
Transcribe the audio files into a list of Documents, one for each input file.
|
Transcribes the audio files into a list of Documents, one for each input file.
|
||||||
|
|
||||||
:param sources: A list of file paths or ByteStreams containing the audio files to transcribe.
|
:param sources:
|
||||||
:returns: A list of Documents, one for each file. The content of the document is the transcription text.
|
A list of file paths or ByteStreams containing the audio files to transcribe.
|
||||||
|
|
||||||
|
:returns: A dictionary with the following keys:
|
||||||
|
- `documents`: A list of Documents, one for each file. The content of the document is the transcribed text.
|
||||||
"""
|
"""
|
||||||
documents = []
|
documents = []
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user