6640bc050d
- Integrate chunking into LocalAI client to avoid GPU OOM on long audio.
- Split long files into overlapping chunks; transcribe each chunk; merge segments with corrected timestamps.
- Auto-enable chunking when audio duration > LOCALAI_MAX_SINGLE_REQUEST_DURATION (default 300s).
- Add env variables:
LOCALAI_CHUNK_DURATION (default 180)
LOCALAI_CHUNK_OVERLAP (default 2)
LOCALAI_MAX_SINGLE_REQUEST_DURATION (default 300)
- Add unit and integration tests for chunking logic.
- Confirmed working end-to-end with vibevoice-cpp-asr on 88-minute file.
87 lines
2.2 KiB
Python
87 lines
2.2 KiB
Python
import os
|
|
import subprocess
|
|
import tempfile
|
|
import pytest
|
|
|
|
from scraibe.audio import (
|
|
get_audio_duration,
|
|
split_audio_into_chunks,
|
|
)
|
|
|
|
TEST_AUDIO_1 = "tests/audio_test_1.mp4"
|
|
TEST_AUDIO_2 = "tests/audio_test_2.mp4"
|
|
|
|
|
|
@pytest.fixture(params=[TEST_AUDIO_1, TEST_AUDIO_2])
|
|
def test_audio_path(request):
|
|
return request.param
|
|
|
|
|
|
def test_get_audio_duration(test_audio_path):
|
|
dur = get_audio_duration(test_audio_path)
|
|
assert isinstance(dur, float)
|
|
assert dur > 0
|
|
|
|
|
|
def test_split_audio_into_chunks_no_split_short(test_audio_path):
|
|
# For short files, should return the same file with no extra chunks
|
|
chunks = split_audio_into_chunks(
|
|
input_path=test_audio_path,
|
|
max_duration=600.0, # larger than both test files
|
|
overlap=2.0,
|
|
)
|
|
assert len(chunks) == 1
|
|
assert chunks[0]["path"] == test_audio_path
|
|
assert chunks[0]["start"] == 0.0
|
|
dur = get_audio_duration(test_audio_path)
|
|
assert abs(chunks[0]["end"] - dur) < 0.05
|
|
|
|
|
|
def test_split_audio_into_chunks_creates_chunks(tmp_path):
|
|
# Use a small chunk duration to force splitting
|
|
chunks = split_audio_into_chunks(
|
|
input_path=TEST_AUDIO_1,
|
|
max_duration=2.0,
|
|
overlap=0.5,
|
|
)
|
|
assert len(chunks) > 1
|
|
|
|
# Check that each chunk file exists and is non-empty
|
|
for c in chunks:
|
|
assert os.path.exists(c["path"])
|
|
assert os.path.getsize(c["path"]) > 0
|
|
|
|
# Check time ordering and overlap
|
|
for i in range(1, len(chunks)):
|
|
prev = chunks[i - 1]
|
|
curr = chunks[i]
|
|
assert curr["start"] >= prev["start"]
|
|
assert curr["start"] < prev["end"] # overlap
|
|
|
|
# Cleanup
|
|
for c in chunks:
|
|
if os.path.exists(c["path"]):
|
|
os.remove(c["path"])
|
|
|
|
|
|
def test_split_audio_into_chunks_total_coverage(test_audio_path):
|
|
dur = get_audio_duration(test_audio_path)
|
|
|
|
# Use small chunks to ensure coverage
|
|
chunks = split_audio_into_chunks(
|
|
input_path=test_audio_path,
|
|
max_duration=2.0,
|
|
overlap=0.5,
|
|
)
|
|
|
|
# First chunk starts at 0
|
|
assert chunks[0]["start"] == 0.0
|
|
|
|
# Last chunk end should cover the duration
|
|
assert chunks[-1]["end"] >= dur - 0.05
|
|
|
|
# Cleanup
|
|
for c in chunks:
|
|
if os.path.exists(c["path"]):
|
|
os.remove(c["path"])
|