mirror of
https://github.com/docling-project/docling.git
synced 2025-06-27 05:20:05 +00:00
60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
![]() |
from pathlib import Path
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from docling.datamodel import asr_model_specs
|
||
|
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
||
|
from docling.datamodel.document import ConversionResult
|
||
|
from docling.datamodel.pipeline_options import AsrPipelineOptions
|
||
|
from docling.document_converter import AudioFormatOption, DocumentConverter
|
||
|
from docling.pipeline.asr_pipeline import AsrPipeline
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def test_audio_path():
|
||
|
return Path("./tests/data/audio/sample_10s.mp3")
|
||
|
|
||
|
|
||
|
def get_asr_converter():
|
||
|
"""Create a DocumentConverter configured for ASR with whisper_turbo model."""
|
||
|
pipeline_options = AsrPipelineOptions()
|
||
|
pipeline_options.asr_options = asr_model_specs.WHISPER_TINY
|
||
|
|
||
|
converter = DocumentConverter(
|
||
|
format_options={
|
||
|
InputFormat.AUDIO: AudioFormatOption(
|
||
|
pipeline_cls=AsrPipeline,
|
||
|
pipeline_options=pipeline_options,
|
||
|
)
|
||
|
}
|
||
|
)
|
||
|
return converter
|
||
|
|
||
|
|
||
|
def test_asr_pipeline_conversion(test_audio_path):
|
||
|
"""Test ASR pipeline conversion using whisper_turbo model on sample_10s.mp3."""
|
||
|
# Check if the test audio file exists
|
||
|
assert test_audio_path.exists(), f"Test audio file not found: {test_audio_path}"
|
||
|
|
||
|
converter = get_asr_converter()
|
||
|
|
||
|
# Convert the audio file
|
||
|
doc_result: ConversionResult = converter.convert(test_audio_path)
|
||
|
|
||
|
# Verify conversion was successful
|
||
|
assert doc_result.status == ConversionStatus.SUCCESS, (
|
||
|
f"Conversion failed with status: {doc_result.status}"
|
||
|
)
|
||
|
|
||
|
# Verify we have a document
|
||
|
assert doc_result.document is not None, "No document was created"
|
||
|
|
||
|
# Verify we have text content (transcribed audio)
|
||
|
texts = doc_result.document.texts
|
||
|
assert len(texts) > 0, "No text content found in transcribed audio"
|
||
|
|
||
|
# Print transcribed text for verification (optional, for debugging)
|
||
|
print(f"Transcribed text from {test_audio_path.name}:")
|
||
|
for i, text_item in enumerate(texts):
|
||
|
print(f" {i + 1}: {text_item.text}")
|