Merge pull request #33 from JSchmie/tests

Good Job :)
2024-04-29 23:05:20 +02:00
parent 91623ac265 ba2eac6c5c
commit fee9f0b468
8 changed files with 327 additions and 120 deletions
@@ -0,0 +1,43 @@
+name: Run tests
+
+on:
+ #push:
+  
+  pull_request:
+    branches: ['main', 'develop']
+
+  workflow_dispatch:
+
+jobs:
+  pytest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.9
+
+      - name: Install Dependencies
+        run: |
+
+             sudo apt update && sudo apt upgrade
+             python -m pip install --upgrade pip
+             pip install -r requirements.txt
+             pip install .
+             sudo apt-get install libsndfile1-dev 
+             sudo apt-get install ffmpeg 
+             pip install pytest
+
+
+           
+
+      - name: Run pytest
+        env: 
+          HF_TOKEN : ${{ secrets.HF_TOKEN }}
+        run: |
+            pytest 
+           
@@ -0,0 +1,127 @@
+import pytest
+from scraibe.audio import AudioProcessor
+import torch
+
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+TEST_WAVEFORM = torch.sin(torch.randn(160000)).to(DEVICE)
+TEST_SR = 16000
+SAMPLE_RATE = 16000
+NORMALIZATION_FACTOR = 32768
+
+
+@pytest.fixture
+def probe_audio_processor():
+    """Fixture for creating an instance of the AudioProcessor class with test waveform and sample rate. 
+    
+    This fixture is used to create an instance of the AudioProcessor class with a predfined test waveform and sample rate (TEST_SR). It returns the instantiated AudioProcessor , which can bes used as a 
+    dependency in other test functions.
+
+
+    Returns:
+        AudioProcessor (obj): An instance of the AudioProcessor class with the test waveform and sample rate.
+    """    
+    return AudioProcessor(TEST_WAVEFORM, TEST_SR)
+
+
+
+
+
+
+def test_AudioProcessor_init(probe_audio_processor):
+    """
+    Test the initialization of the AudioProcessor class.
+
+    This test verifies that the AUdioProcessor class is correctly initialized with the provided waveform and sample rate. It checks whether the instantiated AhdioProcessor object has the correct attributes
+    and whether the waveform and sample rate match the expected values.
+
+    Args:
+        probe_audio_processor (obj): An instance of the AudioProcessor class to be tested.
+
+
+    Returns:
+           None
+
+    
+
+        
+    """    
+    assert isinstance(probe_audio_processor, AudioProcessor)
+    assert probe_audio_processor.waveform.device == TEST_WAVEFORM.device
+    assert torch.equal(probe_audio_processor.waveform, TEST_WAVEFORM)
+    assert probe_audio_processor.sr == TEST_SR
+
+
+
+def test_cut(probe_audio_processor):
+    """Test the cut function of the AudioProcessor class.
+    
+    This test verifies that the cut function correctly extracts a segment of audio data from
+     the waveform, given start and end indices. It checks whether the size of the extracted segment matches
+     the expected size based on the provided start and end indices and the sample rate.
+
+     Returns:
+            None
+
+
+    """    
+  
+    start = 4
+    end = 7
+    trimmed_waveform = probe_audio_processor.cut(start, end)
+    expected_size = int((end - start) * TEST_SR)
+    real_size = trimmed_waveform.size(0)
+    assert real_size == expected_size
+    #assert AudioProcessor(TEST_WAVEFORM, TEST_SR).cut(start, end).size() == int((end - start) * TEST_SR)
+
+
+
+
+   
+
+
+
+
+
+def test_audio_processor_invalid_sr():
+    """Test the behavior of AudioProcessor when an invalid smaple rate is provided.
+    
+    This test verifies that the AudioProcessor constructor raises a ValueError when an invalid sample rate is provided. It uses the pytest.raises context manager to check if the ValueError is raised when initializing an 
+    AudioProcessor object with an invalid sample rate.
+
+    Returns:
+           None
+    """    
+    with pytest.raises(ValueError):
+        AudioProcessor(TEST_WAVEFORM, [44100,48000])
+
+
+def test_audio_processor_SAMPLE_RATE():
+    """Test the default sample rate of the AudioProcessor class.
+    
+    This test verifies that the default sample rate of the AudioProcessor class matches the expected value defined by the constant SAMPLE_RATE. It instantiates an AudioProcessor object with a test waveform
+    and checks whether the sample rate attribute (sr) of the AudioProcessor object equals the predefined constant SAMPLE_RATE.
+
+    Returns:
+           None
+    """    
+    probe_audio_processor = AudioProcessor(TEST_WAVEFORM)
+    assert probe_audio_processor.sr == SAMPLE_RATE       
+
+
+
+
+
+
+
+
+
+
+
+  
+
+
+    
+
+
@@ -0,0 +1,58 @@
+import pytest
+from scraibe import Scraibe, Diariser, Transcriber, Transcript
+from unittest.mock import MagicMock, patch
+import os
+
+
+
+
+
+@pytest.fixture
+def create_scraibe_instance():
+    if "HF_TOKEN" in os.environ:
+        return Scraibe(use_auth_token=os.environ["HF_TOKEN"] )
+    else:
+        return Scraibe()
+    
+
+
+
+def test_scraibe_init(create_scraibe_instance):
+    model = create_scraibe_instance
+    assert isinstance(model.transcriber, Transcriber)
+    assert isinstance(model.diariser, Diariser)
+
+
+def test_scraibe_autotranscribe(create_scraibe_instance):
+    model = create_scraibe_instance
+    transcript = model.autotranscribe('test/audio_test_2.mp4')
+    assert isinstance(transcript, Transcript)
+
+
+def test_scraibe_diarization(create_scraibe_instance):
+    model = create_scraibe_instance
+    diarisation_result = model.diarization('test/audio_test_2.mp4')
+    assert isinstance(diarisation_result, dict)
+
+
+def test_scraibe_transcribe(create_scraibe_instance):
+    model = create_scraibe_instance
+    transcription_result = model.transcribe('test/audio_test_2.mp4')
+    assert isinstance(transcription_result, str)
+
+
+""" def test_remove_audio_file(create_scraibe_instance):
+    model = create_scraibe_instance
+    with pytest.raises(ValueError):
+        model.remove_audio_file("non_existing_audio_file")
+
+    model.remove_audio_file("audio_test_2.mp4")
+    assert not os.path.exists("audio_test_2.mp4")   """    
+
+
+""" def test_get_audio_file(create_scraibe_instance):
+    model = create_scraibe_instance
+    audio_file = os.path.exist("audio_test_2.mp4")
+    assert isinstance(audio_file, AudioProcessor)
+    assert isinstance(audio_file.waveform, torch.Tensor)
+    assert isinstance(audio_file.sr, torch.Tensor)   """
@@ -0,0 +1,47 @@
+import pytest
+import os
+from unittest import mock
+from scraibe import diarisation, Diariser
+
+
+
+@pytest.fixture
+def diariser_instance():
+    """Fixture for creating an instance of the Diariser class with mocked token.
+
+    This fixture is used to create an instance of the the Diariser class with a mocked token returned by the _get_token method. It patches the _get_token method of the Diariser class
+    using unit.test.mock.patch.object, ensuring that it returns a predetrmined value ('personal Hugging-Face token'). The mocked Diariser object is retunrned and can be used as a dependency in otehr tests.
+
+    Returns:
+        Diariser(Obj): An instance of the Diariser class with a mocked token.
+    """
+    #with mock.patch.object(Diariser, '_get_token', return_value = 'HF_TOKEN' ):
+    return Diariser('pyannote')
+
+
+
+def test_Diariser_init(diariser_instance):
+    """Test the initialization of the Diariser class.
+
+    This test verifies that the Diariser class is correctly initialized with the specified model.
+    It checks whether the 'model' attribute of the instantiated Diariser object equals 'pyannote'.
+
+
+    Args:
+        diariser_instance (obj): instance of the Diariser class
+
+    Returns: 
+           None
+    """    
+    assert diariser_instance.model == 'pyannote'
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,52 @@
+import pytest
+from unittest.mock import patch
+from scraibe import Transcriber
+import torch
+
+
+
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+TEST_WAVEFORM = "Hello World"
+
+""" 
+@pytest.mark.parametrize("audio_file, expected_transcription",[("path_to_test_audiofile", "test_transcription")] )
+@patch("scraibe.Transcriber.load_model")
+
+def test_transcriber(mock_load_model, audio_file, expected_transcription):
+    
+
+    Args:
+        mock_load_model (_type_): _description_
+        audio_file (_type_): _description_
+        expected_transcription (_type_): _description_
+       
+    mock_model = mock_load_model.return_value
+    mock_model.transcribe.return_value ={"text": expected_transcription} 
+
+    transcriber = Transcriber.load_model(model="medium")
+
+    transcription_result = transcriber.transcribe(audio=audio_file)
+
+    assert transcription_result == expected_transcription """
+
+@pytest.fixture
+def transcriber_instance():
+    return Transcriber.load_model('medium')
+
+def test_transcriber_initialization(transcriber_instance):
+    assert isinstance(transcriber_instance, Transcriber)
+
+def test_get_whisper_kwargs():
+    kwargs = {"arg1": 1, "arg3": 3} 
+    valid_kwargs = Transcriber._get_whisper_kwargs(**kwargs)
+    assert not valid_kwargs == {"arg1": 1, "arg3": 3}  
+
+
+def test_transcribe(transcriber_instance):
+    model = transcriber_instance
+    #mocker.patch.object(transcriber_instance.model, 'transcribe', return_value={'Hello, World !'} )
+    transcript = model.transcribe('test/audio_test_2.mp4')
+    assert isinstance(transcript, str)
+
+
+
@@ -1,120 +0,0 @@
-import pytest
-from scraibe import Transcriber
-from unittest.mock import patch, mock_open
-import os
-
-def test_load_pyannote_model():
-    """
-    Test load_pyannote_test
-    """
-    from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization
-    from pyannote.audio import Pipeline
-
-    pipeline = Pipeline.from_pretrained("models/pyannote/speaker_diarization/config.yaml")
-    assert isinstance(pipeline, SpeakerDiarization)
-
-# Test Transcribtion class
-
-
-@pytest.fixture
-def transcriber():
-    """
-    Prepare Transcriber for testing
-    Returns: Transcriber Object
-    """
-
-    return Transcriber.load_model("medium", local=True)
-
-
-def test_Transcriber_init(transcriber):
-    """
-    Test Transcriber initialization with a whisper model 
-    """
-    
-    assert isinstance(transcriber, Transcriber)
-
-def test_transcription(transcriber):
-    """
-    Test transcription
-    """
-
-    transcript = transcriber.transcribe("tests/test.wav") 
-    assert isinstance(transcript, str)
-    
-def test_save_transcript_to_file(transcriber):
-    """
-    Test save_transcript_to_file
-    """
-    transcript = transcriber.transcribe("tests/test.wav")
-
-    Transcriber.save_transcript(transcript, "tests/output.txt")
-    
-    assert os.path.exists("tests/output.txt")
-
-    os.remove("tests/output.txt")
-    
-# Test Diaraization class
-
-from scraibe import Diariser
-
-@pytest.fixture
-def diarisation():
-    """
-    Prepare Diarisation for testing
-    Returns: Diarisation Object
-    """
-
-    return Diariser.load_model("models/pyannote/speaker_diarization/config.yaml", local=True)
-
-def test_Diarisation_init(diarisation):
-    """
-    Test Diarisation initialization with a pyannote model 
-    """
-    
-    assert isinstance(diarisation, Diariser)
-
-def test_diarisation(diarisation):
-    """
-    Test diarisation
-    """
-
-    diarisation = diarisation.diarization("tests/test.wav") 
-    assert isinstance(diarisation, dict)
-
-# Test AudioProcessor
-
-from scraibe import AudioProcessor , TorchAudioProcessor
-
-
-def test_AudioProcessor_init():
-    """
-    Test AudioProcessor initialization
-    """
-    audio = AudioProcessor("tests/test.wav")
-    assert isinstance(audio, AudioProcessor)
-
-def test_AudioProcessor_convert():
-    """
-    Test AudioProcessor convert
-    """
-    audio = AudioProcessor("tests/test.wav")
-    audio.convert_audio("tests/test.mp3", format="mp3")
-    assert os.path.exists("tests/test.mp3")
-    
-def test_TorchAudioProcessor_from_file():
-    """
-    Test TorchAudioProcessor initialization
-    """
-    audio = TorchAudioProcessor.from_file("tests/test.wav")
-    
-    assert isinstance(audio, TorchAudioProcessor)
-    
-    os.remove("tests/test.mp3")
-
-
-def test_TorchAudioProcessor_from_ffmpeg():
-    """
-    Test TorchAudioProcessor initialization
-    """
-    audio = TorchAudioProcessor.from_ffmpeg("tests/test.wav")
-    assert isinstance(audio, TorchAudioProcessor)