haystack/e2e/preview/components/test_whisper_remote.py
ZanSara f80ae01174
LocalWhisperTranscriber (v2) (#4909)
* original component

* remove remote parts

* unit tests

* polish docstrings

* fix unit tests

* fix e2e tests

* pylint

* remove check

* review feedback

* add type: ignore

* improve tests

* test stream handling

* upgrade canals and improve tests

* pylint
2023-05-22 18:30:35 +02:00

38 lines
1.4 KiB
Python

import os
import pytest
from haystack.preview.components.audio.whisper_remote import RemoteWhisperTranscriber
@pytest.mark.skipif(
not os.environ.get("OPENAI_API_KEY", None),
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",
)
def test_whisper_remote_transcriber(preview_samples_path):
comp = RemoteWhisperTranscriber(api_key=os.environ.get("OPENAI_API_KEY"))
output = comp.run(
RemoteWhisperTranscriber.Input(
audio_files=[
preview_samples_path / "audio" / "this is the content of the document.wav",
str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute()),
open(preview_samples_path / "audio" / "answer.wav", "rb"),
]
)
)
docs = output.documents
assert len(docs) == 3
assert "this is the content of the document." == docs[0].content.strip().lower()
assert preview_samples_path / "audio" / "this is the content of the document.wav" == docs[0].metadata["audio_file"]
assert "the context for this answer is here." == docs[1].content.strip().lower()
assert (
str((preview_samples_path / "audio" / "the context for this answer is here.wav").absolute())
== docs[1].metadata["audio_file"]
)
assert "answer." == docs[2].content.strip().lower()
assert "<<binary stream>>" == docs[2].metadata["audio_file"]