added tests folder
This commit is contained in:
@@ -0,0 +1,96 @@
|
|||||||
|
import pytest
|
||||||
|
from scraibe.audio import AudioProcessor
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
TEST_WAVEFORM = torch.sin(torch.randn(160000)).to(DEVICE)
|
||||||
|
TEST_SR = 16000
|
||||||
|
SAMPLE_RATE = 16000
|
||||||
|
NORMALIZATION_FACTOR = 32768
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def probe_audio_processor():
|
||||||
|
"""Fixture for creating an instance of the AudioProcessor class with test waveform and sample rate.
|
||||||
|
|
||||||
|
This fixture is used to create an instance of the AudioProcessor class with a predfined test waveform and sample rate (TEST_SR). It returns the instantiated AudioProcessor , which can bes used as a
|
||||||
|
dependency in other test functions.
|
||||||
|
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
AudioProcessor (obj): An instance of the AudioProcessor class with the test waveform and sample rate.
|
||||||
|
"""
|
||||||
|
return AudioProcessor(TEST_WAVEFORM, TEST_SR)
|
||||||
|
|
||||||
|
|
||||||
|
def test_AudioProcessor_init(probe_audio_processor):
|
||||||
|
"""
|
||||||
|
Test the initialization of the AudioProcessor class.
|
||||||
|
|
||||||
|
This test verifies that the AUdioProcessor class is correctly initialized with the provided waveform and sample rate. It checks whether the instantiated AhdioProcessor object has the correct attributes
|
||||||
|
and whether the waveform and sample rate match the expected values.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
probe_audio_processor (obj): An instance of the AudioProcessor class to be tested.
|
||||||
|
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert isinstance(probe_audio_processor, AudioProcessor)
|
||||||
|
assert probe_audio_processor.waveform.device == TEST_WAVEFORM.device
|
||||||
|
assert torch.equal(probe_audio_processor.waveform, TEST_WAVEFORM)
|
||||||
|
assert probe_audio_processor.sr == TEST_SR
|
||||||
|
|
||||||
|
|
||||||
|
def test_cut(probe_audio_processor):
|
||||||
|
"""Test the cut function of the AudioProcessor class.
|
||||||
|
|
||||||
|
This test verifies that the cut function correctly extracts a segment of audio data from
|
||||||
|
the waveform, given start and end indices. It checks whether the size of the extracted segment matches
|
||||||
|
the expected size based on the provided start and end indices and the sample rate.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
start = 4
|
||||||
|
end = 7
|
||||||
|
trimmed_waveform = probe_audio_processor.cut(start, end)
|
||||||
|
expected_size = int((end - start) * TEST_SR)
|
||||||
|
real_size = trimmed_waveform.size(0)
|
||||||
|
assert real_size == expected_size
|
||||||
|
# assert AudioProcessor(TEST_WAVEFORM, TEST_SR).cut(start, end).size() == int((end - start) * TEST_SR)
|
||||||
|
|
||||||
|
|
||||||
|
def test_audio_processor_invalid_sr():
|
||||||
|
"""Test the behavior of AudioProcessor when an invalid smaple rate is provided.
|
||||||
|
|
||||||
|
This test verifies that the AudioProcessor constructor raises a ValueError when an invalid sample rate is provided. It uses the pytest.raises context manager to check if the ValueError is raised when initializing an
|
||||||
|
AudioProcessor object with an invalid sample rate.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
AudioProcessor(TEST_WAVEFORM, [44100, 48000])
|
||||||
|
|
||||||
|
|
||||||
|
def test_audio_processor_SAMPLE_RATE():
|
||||||
|
"""Test the default sample rate of the AudioProcessor class.
|
||||||
|
|
||||||
|
This test verifies that the default sample rate of the AudioProcessor class matches the expected value defined by the constant SAMPLE_RATE. It instantiates an AudioProcessor object with a test waveform
|
||||||
|
and checks whether the sample rate attribute (sr) of the AudioProcessor object equals the predefined constant SAMPLE_RATE.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
probe_audio_processor = AudioProcessor(TEST_WAVEFORM)
|
||||||
|
assert probe_audio_processor.sr == SAMPLE_RATE
|
||||||
@@ -19,19 +19,19 @@ def test_scraibe_init(create_scraibe_instance):
|
|||||||
|
|
||||||
def test_scraibe_autotranscribe(create_scraibe_instance):
|
def test_scraibe_autotranscribe(create_scraibe_instance):
|
||||||
model = create_scraibe_instance
|
model = create_scraibe_instance
|
||||||
transcript = model.autotranscribe('./audio_test_2.mp4')
|
transcript = model.autotranscribe('tests/audio_test_2.mp4')
|
||||||
assert isinstance(transcript, Transcript)
|
assert isinstance(transcript, Transcript)
|
||||||
|
|
||||||
|
|
||||||
def test_scraibe_diarization(create_scraibe_instance):
|
def test_scraibe_diarization(create_scraibe_instance):
|
||||||
model = create_scraibe_instance
|
model = create_scraibe_instance
|
||||||
diarisation_result = model.diarization('./audio_test_2.mp4')
|
diarisation_result = model.diarization('tests/audio_test_2.mp4')
|
||||||
assert isinstance(diarisation_result, dict)
|
assert isinstance(diarisation_result, dict)
|
||||||
|
|
||||||
|
|
||||||
def test_scraibe_transcribe(create_scraibe_instance):
|
def test_scraibe_transcribe(create_scraibe_instance):
|
||||||
model = create_scraibe_instance
|
model = create_scraibe_instance
|
||||||
transcription_result = model.transcribe('./audio_test_2.mp4')
|
transcription_result = model.transcribe('tests/audio_test_2.mp4')
|
||||||
assert isinstance(transcription_result, str)
|
assert isinstance(transcription_result, str)
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
import pytest
|
||||||
|
from scraibe import Diariser
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def diariser_instance():
|
||||||
|
"""Fixture for creating an instance of the Diariser class with mocked token.
|
||||||
|
|
||||||
|
This fixture is used to create an instance of the the Diariser class with a mocked token returned by the _get_token method. It patches the _get_token method of the Diariser class
|
||||||
|
using unit.test.mock.patch.object, ensuring that it returns a predetrmined value ('personal Hugging-Face token'). The mocked Diariser object is retunrned and can be used as a dependency in otehr tests.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Diariser(Obj): An instance of the Diariser class with a mocked token.
|
||||||
|
"""
|
||||||
|
# with mock.patch.object(Diariser, '_get_token', return_value = 'HF_TOKEN' ):
|
||||||
|
return Diariser('pyannote')
|
||||||
|
|
||||||
|
|
||||||
|
def test_Diariser_init(diariser_instance):
|
||||||
|
"""Test the initialization of the Diariser class.
|
||||||
|
|
||||||
|
This test verifies that the Diariser class is correctly initialized with the specified model.
|
||||||
|
It checks whether the 'model' attribute of the instantiated Diariser object equals 'pyannote'.
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
diariser_instance (obj): instance of the Diariser class
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
assert diariser_instance.model == 'pyannote'
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
import pytest
|
||||||
|
from scraibe import (Transcriber, WhisperTranscriber,
|
||||||
|
FasterWhisperTranscriber, load_transcriber)
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
TEST_WAVEFORM = "Hello World"
|
||||||
|
|
||||||
|
"""
|
||||||
|
@pytest.mark.parametrize("audio_file, expected_transcription",[("path_to_test_audiofile", "test_transcription")] )
|
||||||
|
@patch("scraibe.Transcriber.load_model")
|
||||||
|
|
||||||
|
def test_transcriber(mock_load_model, audio_file, expected_transcription):
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mock_load_model (_type_): _description_
|
||||||
|
audio_file (_type_): _description_
|
||||||
|
expected_transcription (_type_): _description_
|
||||||
|
|
||||||
|
mock_model = mock_load_model.return_value
|
||||||
|
mock_model.transcribe.return_value ={"text": expected_transcription}
|
||||||
|
|
||||||
|
transcriber = Transcriber.load_model(model="medium")
|
||||||
|
|
||||||
|
transcription_result = transcriber.transcribe(audio=audio_file)
|
||||||
|
|
||||||
|
assert transcription_result == expected_transcription """
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def whisper_instance():
|
||||||
|
return load_transcriber('tiny', whisper_type='whisper')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def faster_whisper_instance():
|
||||||
|
return load_transcriber('tiny', whisper_type='faster-whisper')
|
||||||
|
|
||||||
|
|
||||||
|
def test_whisper_base_initialization(whisper_instance):
|
||||||
|
assert isinstance(whisper_instance, Transcriber)
|
||||||
|
|
||||||
|
|
||||||
|
def test_faster_whisper_base_initialization(faster_whisper_instance):
|
||||||
|
assert isinstance(faster_whisper_instance, Transcriber)
|
||||||
|
|
||||||
|
|
||||||
|
def test_whisper_transcriber_initialization(whisper_instance):
|
||||||
|
assert isinstance(whisper_instance, WhisperTranscriber)
|
||||||
|
|
||||||
|
|
||||||
|
def test_faster_whisper_transcriber_initialization(faster_whisper_instance):
|
||||||
|
assert isinstance(faster_whisper_instance, FasterWhisperTranscriber)
|
||||||
|
|
||||||
|
|
||||||
|
def test_wrong_transcriber_initialization():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
load_transcriber('tiny', whisper_type='wrong_whisper')
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_whisper_kwargs():
|
||||||
|
kwargs = {"arg1": 1, "arg3": 3}
|
||||||
|
valid_kwargs = Transcriber._get_whisper_kwargs(**kwargs)
|
||||||
|
assert not valid_kwargs == {"arg1": 1, "arg3": 3}
|
||||||
|
|
||||||
|
|
||||||
|
def test_whisper_transcribe(whisper_instance):
|
||||||
|
model = whisper_instance
|
||||||
|
# mocker.patch.object(transcriber_instance.model, 'transcribe', return_value={'Hello, World !'} )
|
||||||
|
transcript = model.transcribe('tests/audio_test_2.mp4')
|
||||||
|
assert isinstance(transcript, str)
|
||||||
|
|
||||||
|
|
||||||
|
def test_faster_whisper_transcribe(faster_whisper_instance):
|
||||||
|
model = faster_whisper_instance
|
||||||
|
# mocker.patch.object(transcriber_instance.model, 'transcribe', return_value={'Hello, World !'} )
|
||||||
|
transcript = model.transcribe('tests/audio_test_2.mp4')
|
||||||
|
assert isinstance(transcript, str)
|
||||||
Reference in New Issue
Block a user