change class name vom AutoTranscribe to Scraibe

2023-09-22 13:32:55 +02:00
parent bc1dd9d541
commit 1707777c64
6 changed files with 41 additions and 65 deletions
@@ -3,7 +3,7 @@ Gradio Audio Transcription App.
 --------------------------------
 This module provides an interface to transcribe audio files using the 
-AutoTranscribe model. Users can either upload an audio file or record their speech 
+Scraibe model. Users can either upload an audio file or record their speech 
 live for transcription. The application supports multiple languages and provides 
 options to specify the number of speakers and the language of the audio.
@@ -20,7 +20,7 @@ Gradio Audio Transcription App.
 --------------------------------
 This module provides an interface to transcribe audio files using the 
-AutoTranscribe model. Users can either upload an audio file or record their speech 
+Scraibe model. Users can either upload an audio file or record their speech 
 live for transcription. The application supports multiple languages and provides 
 options to specify the number of speakers and the language of the audio.
@@ -37,7 +37,7 @@ import json
 import gradio as gr
 from tqdm import tqdm
-from scraibe import AutoTranscribe, Transcript
+from scraibe import Scraibe, Transcript
 theme = gr.themes.Soft(
    primary_hue="green",
@@ -65,12 +65,12 @@ class GradioTranscriptionInterface:
    Interface handling the interaction between Gradio UI and the Audio Transcription system.
    """
-    def __init__(self, model: AutoTranscribe):
+    def __init__(self, model: Scraibe):
        """
        Initializes the GradioTranscriptionInterface with a transcription model.
        Args:
-            model (AutoTranscribe): Model responsible for audio transcription tasks.
+            model (Scraibe): Model responsible for audio transcription tasks.
        """
        self.model = model
@@ -79,7 +79,7 @@ class GradioTranscriptionInterface:
                        translation : bool,
                        language : str):
        """
-        Shortcut method for the AutoTranscribe task.
+        Shortcut method for the Scraibe task.
        Returns:
            tuple: Transcribed text (str), JSON output (dict)
@@ -209,10 +209,10 @@ class GradioTranscriptionInterface:
 # Gradio Interface
 ####
-def gradio_Interface(model : AutoTranscribe = None):
+def gradio_Interface(model : Scraibe = None):
    if model is None:
-        model = AutoTranscribe()
+        model = Scraibe()
    pipe = GradioTranscriptionInterface(model)
@@ -1,5 +1,5 @@
 """
-AutoTranscribe Class
+Scraibe Class
 --------------------
 This class serves as the core of the transcription system, responsible for handling
@@ -12,15 +12,15 @@ By encapsulating the complexities of underlying models, it allows for straightfo
 integration into various applications, ranging from transcription services to voice assistants.
 Available Classes:
- AutoTranscribe: Main class for performing transcription and diarization.
+- Scraibe: Main class for performing transcription and diarization.
                  Includes methods for loading models, processing audio files,
                  and formatting the transcription output.
 Usage:
-    from .autotranscribe import AutoTranscribe
+    from scraibe import Scraibe
-    model = AutoTranscribe(whisper_model="path/to/whisper/model", dia_model="path/to/diarisation/model")
+    model = Scraibe()
-    transcript = model.transcribe("path/to/audiofile.wav")
+    transcript = model.autotranscribe("path/to/audiofile.wav")
 """
 # Standard Library Imports
@@ -45,9 +45,9 @@ from .transcript_exporter import Transcript
 DiarisationType = TypeVar('DiarisationType')
-class AutoTranscribe:
+class Scraibe:
    """
-    AutoTranscribe is a class responsible for managing the transcription and diarization of audio files.
+    Scraibe is a class responsible for managing the transcription and diarization of audio files.
    It serves as the core of the transcription system, incorporating pretrained models
    for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio),
    allowing for comprehensive audio processing.
@@ -57,7 +57,7 @@ class AutoTranscribe:
        diariser (Diariser): The diariser object to handle diarization.
    Methods:
-        __init__: Initializes the AutoTranscribe class with appropriate models.
+        __init__: Initializes the Scraibe class with appropriate models.
        transcribe: Transcribes an audio file using the whisper model and pyannote diarization model.
        remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy.
        get_audio_file: Gets an audio file as an AudioProcessor object.
@@ -66,7 +66,7 @@ class AutoTranscribe:
                whisper_model: Union[bool, str, whisper] = None,
                dia_model : Union[bool, str, DiarisationType] = None,
                **kwargs) -> None:
-        """Initializes the AutoTranscribe class.
+        """Initializes the Scraibe class.
        Args:
            whisper_model (Union[bool, str, whisper], optional): 
@@ -92,7 +92,11 @@ class AutoTranscribe:
        else:
            self.diariser = dia_model
-        print("AutoTranscribe initialized all models successfully loaded.")
+        if kwargs.get("verbose"):
            print("Scraibe initialized all models successfully loaded.")
            self.verbose = True
        else:
            self.verbose = False
    def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
                   remove_original : bool = False,
@@ -112,7 +116,8 @@ class AutoTranscribe:
            Transcript: A Transcript object containing the transcription,
                        which can be exported to different formats.
        """
-        
+        if kwargs.get("verbose"):
            self.verbose = kwargs.get("verbose")
        # Get audio file as an AudioProcessor object
        audio_file = self.get_audio_file(audio_file)
@@ -121,8 +126,9 @@ class AutoTranscribe:
            "waveform" : audio_file.waveform.reshape(1,len(audio_file.waveform)), 
            "sample_rate": audio_file.sr
            }
-       
+
-        print("Starting diarisation.")
+        if self.verbose:
            print("Starting diarisation.")
        diarisation = self.diariser.diarization(dia_audio, **kwargs)
@@ -137,14 +143,15 @@ class AutoTranscribe:
            return Transcript(final_transcript)
-        print("Diarisation finished. Starting transcription.")
+        if self.verbose:
            print("Diarisation finished. Starting transcription.")
        audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device)
        # Transcribe each segment and store the results
        final_transcript = dict()
-        for i in trange(len(diarisation["segments"]), desc= "Transcribing"):
+        for i in trange(len(diarisation["segments"]), desc= "Transcribing", disable = not self.verbose):
            seg = diarisation["segments"][i]
@@ -280,4 +287,4 @@ class AutoTranscribe:
        return audio_file
    def __repr__(self):
-        return f"AutoTranscribe(transcriber={self.transcriber}, diariser={self.diariser})"
+        return f"Scraibe(transcriber={self.transcriber}, diariser={self.diariser})"
@@ -1,5 +1,5 @@
 """
-Command-Line Interface (CLI) for the AutoTranscribe class,
+Command-Line Interface (CLI) for the Scraibe class,
 allowing for user interaction to transcribe and diarize audio files. 
 The function includes arguments for specifying the audio files, model paths,
 output formats, and other options necessary for transcription.
@@ -8,9 +8,7 @@ import os
 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 import json
-from sympy import use
+from .autotranscript import Scraibe
 from .autotranscript import AutoTranscribe
 from .app.gradio_app import gradio_Interface
 from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
@@ -20,12 +18,12 @@ from torch import set_num_threads
 def cli():
    """
-    Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe 
+    Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe 
    and diarize audio files. The function includes arguments for specifying the audio files, model paths, 
    output formats, and other options necessary for transcription.
    This function can be executed from the command line to perform transcription tasks, providing a 
-    user-friendly way to access the AutoTranscribe class functionalities.
+    user-friendly way to access the Scraibe class functionalities.
    """
    def str2bool(string):
@@ -115,7 +113,7 @@ def cli():
    if arg_dict["whisper_model_directory"]:
        class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
-    model = AutoTranscribe(**class_kwargs)
+    model = Scraibe(**class_kwargs)
    if arg_dict["audio_files"]:
@@ -14,7 +14,6 @@ WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper")
 PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote")
 PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")
 def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None:
    """Configure diarization pipeline from a YAML file.
@@ -90,8 +90,8 @@ class Transcriber:
        kwargs = self._get_whisper_kwargs(**kwargs)
-        if "verbose" not in kwargs:
+        if not kwargs.get("verbose"):
-            kwargs["verbose"] = False    
+            kwargs["verbose"] = None 
        result = self.model.transcribe(audio, *args, **kwargs)
        return result["text"]
@@ -1,36 +1,8 @@
 # import os
 # import sys
 # import traceback
-# class TracePrints(object):
+from scraibe import Scraibe
-#   def __init__(self):    
+model = Scraibe()
 #     self.stdout = sys.stdout
 #   def write(self, s):
 #     self.stdout.write("Writing %r\n" % s)
 #     traceback.print_stack(file=self.stdout)
-# sys.stdout = TracePrints()
+text = model.autotranscocribe('kida.mp4', num_speakers=2)
 # os.environ["PYANNOTE_CACHE"] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models/pyannote")
 # import os
 # os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
 # os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
 from scraibe import AutoTranscribe
 model = AutoTranscribe()
 text = model.autotranscribe('kida.mp4', num_speakers=2)
 print("Transcription:\n")
 print(text)
 # from autotranscript.misc import *
 # import os
 # print(os.path.exists(CACHE_DIR))
 # print(os.path.exists(WHISPER_DEFAULT_PATH))
 # print(os.path.exists(PYANNOTE_DEFAULT_PATH))
 # print(os.path.exists(PYANNOTE_DEFAULT_CONFIG))