import os import subprocess import tempfile import pytest from scraibe.audio import ( get_audio_duration, split_audio_into_chunks, ) TEST_AUDIO_1 = "tests/audio_test_1.mp4" TEST_AUDIO_2 = "tests/audio_test_2.mp4" @pytest.fixture(params=[TEST_AUDIO_1, TEST_AUDIO_2]) def test_audio_path(request): return request.param def test_get_audio_duration(test_audio_path): dur = get_audio_duration(test_audio_path) assert isinstance(dur, float) assert dur > 0 def test_split_audio_into_chunks_no_split_short(test_audio_path): # For short files, should return the same file with no extra chunks chunks = split_audio_into_chunks( input_path=test_audio_path, max_duration=600.0, # larger than both test files overlap=2.0, ) assert len(chunks) == 1 assert chunks[0]["path"] == test_audio_path assert chunks[0]["start"] == 0.0 dur = get_audio_duration(test_audio_path) assert abs(chunks[0]["end"] - dur) < 0.05 def test_split_audio_into_chunks_creates_chunks(tmp_path): # Use a small chunk duration to force splitting chunks = split_audio_into_chunks( input_path=TEST_AUDIO_1, max_duration=2.0, overlap=0.5, ) assert len(chunks) > 1 # Check that each chunk file exists and is non-empty for c in chunks: assert os.path.exists(c["path"]) assert os.path.getsize(c["path"]) > 0 # Check time ordering and overlap for i in range(1, len(chunks)): prev = chunks[i - 1] curr = chunks[i] assert curr["start"] >= prev["start"] assert curr["start"] < prev["end"] # overlap # Cleanup for c in chunks: if os.path.exists(c["path"]): os.remove(c["path"]) def test_split_audio_into_chunks_total_coverage(test_audio_path): dur = get_audio_duration(test_audio_path) # Use small chunks to ensure coverage chunks = split_audio_into_chunks( input_path=test_audio_path, max_duration=2.0, overlap=0.5, ) # First chunk starts at 0 assert chunks[0]["start"] == 0.0 # Last chunk end should cover the duration assert chunks[-1]["end"] >= dur - 0.05 # Cleanup for c in chunks: if os.path.exists(c["path"]): os.remove(c["path"])