Files
scribe/tests/test_audio_chunking.py
T
admin 6640bc050d
Mirror and run GitLab CI / build (push) Waiting to run
Ruff / ruff (push) Waiting to run
feat: add chunked ASR for long audio with env-configurable chunk duration
- Integrate chunking into LocalAI client to avoid GPU OOM on long audio.
- Split long files into overlapping chunks; transcribe each chunk; merge segments with corrected timestamps.
- Auto-enable chunking when audio duration > LOCALAI_MAX_SINGLE_REQUEST_DURATION (default 300s).
- Add env variables:
    LOCALAI_CHUNK_DURATION (default 180)
    LOCALAI_CHUNK_OVERLAP (default 2)
    LOCALAI_MAX_SINGLE_REQUEST_DURATION (default 300)
- Add unit and integration tests for chunking logic.
- Confirmed working end-to-end with vibevoice-cpp-asr on 88-minute file.
2026-06-18 17:46:29 +00:00

87 lines
2.2 KiB
Python

import os
import subprocess
import tempfile
import pytest
from scraibe.audio import (
get_audio_duration,
split_audio_into_chunks,
)
TEST_AUDIO_1 = "tests/audio_test_1.mp4"
TEST_AUDIO_2 = "tests/audio_test_2.mp4"
@pytest.fixture(params=[TEST_AUDIO_1, TEST_AUDIO_2])
def test_audio_path(request):
return request.param
def test_get_audio_duration(test_audio_path):
dur = get_audio_duration(test_audio_path)
assert isinstance(dur, float)
assert dur > 0
def test_split_audio_into_chunks_no_split_short(test_audio_path):
# For short files, should return the same file with no extra chunks
chunks = split_audio_into_chunks(
input_path=test_audio_path,
max_duration=600.0, # larger than both test files
overlap=2.0,
)
assert len(chunks) == 1
assert chunks[0]["path"] == test_audio_path
assert chunks[0]["start"] == 0.0
dur = get_audio_duration(test_audio_path)
assert abs(chunks[0]["end"] - dur) < 0.05
def test_split_audio_into_chunks_creates_chunks(tmp_path):
# Use a small chunk duration to force splitting
chunks = split_audio_into_chunks(
input_path=TEST_AUDIO_1,
max_duration=2.0,
overlap=0.5,
)
assert len(chunks) > 1
# Check that each chunk file exists and is non-empty
for c in chunks:
assert os.path.exists(c["path"])
assert os.path.getsize(c["path"]) > 0
# Check time ordering and overlap
for i in range(1, len(chunks)):
prev = chunks[i - 1]
curr = chunks[i]
assert curr["start"] >= prev["start"]
assert curr["start"] < prev["end"] # overlap
# Cleanup
for c in chunks:
if os.path.exists(c["path"]):
os.remove(c["path"])
def test_split_audio_into_chunks_total_coverage(test_audio_path):
dur = get_audio_duration(test_audio_path)
# Use small chunks to ensure coverage
chunks = split_audio_into_chunks(
input_path=test_audio_path,
max_duration=2.0,
overlap=0.5,
)
# First chunk starts at 0
assert chunks[0]["start"] == 0.0
# Last chunk end should cover the duration
assert chunks[-1]["end"] >= dur - 0.05
# Cleanup
for c in chunks:
if os.path.exists(c["path"]):
os.remove(c["path"])