From 53e57a06d70263a08467ecea9063d44738b9c0c7 Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Mon, 9 Sep 2024 12:25:14 +0200 Subject: [PATCH] Added tests for faster-whisper --- test/test_transcriber.py | 18 +++++++++--------- tests/test_diarization.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 tests/test_diarization.py diff --git a/test/test_transcriber.py b/test/test_transcriber.py index 31765f6..bd1e9f5 100644 --- a/test/test_transcriber.py +++ b/test/test_transcriber.py @@ -1,6 +1,6 @@ import pytest from scraibe import (Transcriber, WhisperTranscriber, - WhisperXTranscriber, load_transcriber) + FasterWhisperTranscriber, load_transcriber) import torch @@ -35,24 +35,24 @@ def whisper_instance(): @pytest.fixture -def whisperx_instance(): - return load_transcriber('medium', whisper_type='whisperx') +def faster_whisper_instance(): + return load_transcriber('medium', whisper_type='faster-whisper') def test_whisper_base_initialization(whisper_instance): assert isinstance(whisper_instance, Transcriber) -def test_whisperx_base_initialization(whisperx_instance): - assert isinstance(whisperx_instance, Transcriber) +def test_faster_whisper_base_initialization(faster_whisper_instance): + assert isinstance(faster_whisper_instance, Transcriber) def test_whisper_transcriber_initialization(whisper_instance): assert isinstance(whisper_instance, WhisperTranscriber) -def test_whisperx_transcriber_initialization(whisperx_instance): - assert isinstance(whisperx_instance, WhisperXTranscriber) +def test_faster_whisper_transcriber_initialization(faster_whisper_instance): + assert isinstance(faster_whisper_instance, FasterWhisperTranscriber) def test_wrong_transcriber_initialization(): @@ -73,8 +73,8 @@ def test_whisper_transcribe(whisper_instance): assert isinstance(transcript, str) -def test_whisperx_transcribe(whisperx_instance): - model = whisperx_instance +def test_faster_whisper_transcribe(faster_whisper_instance): + model = faster_whisper_instance # mocker.patch.object(transcriber_instance.model, 'transcribe', return_value={'Hello, World !'} ) transcript = model.transcribe('test/audio_test_2.mp4') assert isinstance(transcript, str) diff --git a/tests/test_diarization.py b/tests/test_diarization.py new file mode 100644 index 0000000..f9e81a5 --- /dev/null +++ b/tests/test_diarization.py @@ -0,0 +1,10 @@ +from os import environ + +environ["AUTOT_CACHE"] = "/mnt/disk1/Projekte/ScrAIbe/tests" +# environ["PYANNOTE_CACHE"] = "/mnt/disk1/Projekte/ScrAIbe/tests/pyannote" +# environ["TORCH_HOME"] = "/mnt/disk1/Projekte/ScrAIbe/tests/torch" + +from scraibe import Scraibe + +scraibe = Scraibe(whisper_type = "faster-whisper", whisper_model = "tiny") +print(scraibe.autotranscribe('/mnt/disk1/Projekte/ScrAIbe/test/audio_test_1.mp4')) \ No newline at end of file