From 1707777c64bfad86806f72ded8aa49dd52c7d32a Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 22 Sep 2023 13:32:55 +0200 Subject: [PATCH] change class name vom AutoTranscribe to Scraibe --- scraibe/app/gradio_app.py | 16 ++++++++-------- scraibe/autotranscript.py | 39 +++++++++++++++++++++++---------------- scraibe/cli.py | 12 +++++------- scraibe/misc.py | 1 - scraibe/transcriber.py | 4 ++-- transcribe.py | 34 +++------------------------------- 6 files changed, 41 insertions(+), 65 deletions(-) diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py index 6d70097..f5126c7 100644 --- a/scraibe/app/gradio_app.py +++ b/scraibe/app/gradio_app.py @@ -3,7 +3,7 @@ Gradio Audio Transcription App. -------------------------------- This module provides an interface to transcribe audio files using the -AutoTranscribe model. Users can either upload an audio file or record their speech +Scraibe model. Users can either upload an audio file or record their speech live for transcription. The application supports multiple languages and provides options to specify the number of speakers and the language of the audio. @@ -20,7 +20,7 @@ Gradio Audio Transcription App. -------------------------------- This module provides an interface to transcribe audio files using the -AutoTranscribe model. Users can either upload an audio file or record their speech +Scraibe model. Users can either upload an audio file or record their speech live for transcription. The application supports multiple languages and provides options to specify the number of speakers and the language of the audio. @@ -37,7 +37,7 @@ import json import gradio as gr from tqdm import tqdm -from scraibe import AutoTranscribe, Transcript +from scraibe import Scraibe, Transcript theme = gr.themes.Soft( primary_hue="green", @@ -65,12 +65,12 @@ class GradioTranscriptionInterface: Interface handling the interaction between Gradio UI and the Audio Transcription system. """ - def __init__(self, model: AutoTranscribe): + def __init__(self, model: Scraibe): """ Initializes the GradioTranscriptionInterface with a transcription model. Args: - model (AutoTranscribe): Model responsible for audio transcription tasks. + model (Scraibe): Model responsible for audio transcription tasks. """ self.model = model @@ -79,7 +79,7 @@ class GradioTranscriptionInterface: translation : bool, language : str): """ - Shortcut method for the AutoTranscribe task. + Shortcut method for the Scraibe task. Returns: tuple: Transcribed text (str), JSON output (dict) @@ -209,10 +209,10 @@ class GradioTranscriptionInterface: # Gradio Interface #### -def gradio_Interface(model : AutoTranscribe = None): +def gradio_Interface(model : Scraibe = None): if model is None: - model = AutoTranscribe() + model = Scraibe() pipe = GradioTranscriptionInterface(model) diff --git a/scraibe/autotranscript.py b/scraibe/autotranscript.py index f588e42..b3545e4 100644 --- a/scraibe/autotranscript.py +++ b/scraibe/autotranscript.py @@ -1,5 +1,5 @@ """ -AutoTranscribe Class +Scraibe Class -------------------- This class serves as the core of the transcription system, responsible for handling @@ -12,15 +12,15 @@ By encapsulating the complexities of underlying models, it allows for straightfo integration into various applications, ranging from transcription services to voice assistants. Available Classes: -- AutoTranscribe: Main class for performing transcription and diarization. +- Scraibe: Main class for performing transcription and diarization. Includes methods for loading models, processing audio files, and formatting the transcription output. Usage: - from .autotranscribe import AutoTranscribe + from scraibe import Scraibe - model = AutoTranscribe(whisper_model="path/to/whisper/model", dia_model="path/to/diarisation/model") - transcript = model.transcribe("path/to/audiofile.wav") + model = Scraibe() + transcript = model.autotranscribe("path/to/audiofile.wav") """ # Standard Library Imports @@ -45,9 +45,9 @@ from .transcript_exporter import Transcript DiarisationType = TypeVar('DiarisationType') -class AutoTranscribe: +class Scraibe: """ - AutoTranscribe is a class responsible for managing the transcription and diarization of audio files. + Scraibe is a class responsible for managing the transcription and diarization of audio files. It serves as the core of the transcription system, incorporating pretrained models for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio), allowing for comprehensive audio processing. @@ -57,7 +57,7 @@ class AutoTranscribe: diariser (Diariser): The diariser object to handle diarization. Methods: - __init__: Initializes the AutoTranscribe class with appropriate models. + __init__: Initializes the Scraibe class with appropriate models. transcribe: Transcribes an audio file using the whisper model and pyannote diarization model. remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy. get_audio_file: Gets an audio file as an AudioProcessor object. @@ -66,7 +66,7 @@ class AutoTranscribe: whisper_model: Union[bool, str, whisper] = None, dia_model : Union[bool, str, DiarisationType] = None, **kwargs) -> None: - """Initializes the AutoTranscribe class. + """Initializes the Scraibe class. Args: whisper_model (Union[bool, str, whisper], optional): @@ -92,7 +92,11 @@ class AutoTranscribe: else: self.diariser = dia_model - print("AutoTranscribe initialized all models successfully loaded.") + if kwargs.get("verbose"): + print("Scraibe initialized all models successfully loaded.") + self.verbose = True + else: + self.verbose = False def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray], remove_original : bool = False, @@ -112,7 +116,8 @@ class AutoTranscribe: Transcript: A Transcript object containing the transcription, which can be exported to different formats. """ - + if kwargs.get("verbose"): + self.verbose = kwargs.get("verbose") # Get audio file as an AudioProcessor object audio_file = self.get_audio_file(audio_file) @@ -121,8 +126,9 @@ class AutoTranscribe: "waveform" : audio_file.waveform.reshape(1,len(audio_file.waveform)), "sample_rate": audio_file.sr } - - print("Starting diarisation.") + + if self.verbose: + print("Starting diarisation.") diarisation = self.diariser.diarization(dia_audio, **kwargs) @@ -137,14 +143,15 @@ class AutoTranscribe: return Transcript(final_transcript) - print("Diarisation finished. Starting transcription.") + if self.verbose: + print("Diarisation finished. Starting transcription.") audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device) # Transcribe each segment and store the results final_transcript = dict() - for i in trange(len(diarisation["segments"]), desc= "Transcribing"): + for i in trange(len(diarisation["segments"]), desc= "Transcribing", disable = not self.verbose): seg = diarisation["segments"][i] @@ -280,4 +287,4 @@ class AutoTranscribe: return audio_file def __repr__(self): - return f"AutoTranscribe(transcriber={self.transcriber}, diariser={self.diariser})" + return f"Scraibe(transcriber={self.transcriber}, diariser={self.diariser})" diff --git a/scraibe/cli.py b/scraibe/cli.py index b9da56d..b05da92 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -1,5 +1,5 @@ """ -Command-Line Interface (CLI) for the AutoTranscribe class, +Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe and diarize audio files. The function includes arguments for specifying the audio files, model paths, output formats, and other options necessary for transcription. @@ -8,9 +8,7 @@ import os from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import json -from sympy import use - -from .autotranscript import AutoTranscribe +from .autotranscript import Scraibe from .app.gradio_app import gradio_Interface from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE @@ -20,12 +18,12 @@ from torch import set_num_threads def cli(): """ - Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe + Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe and diarize audio files. The function includes arguments for specifying the audio files, model paths, output formats, and other options necessary for transcription. This function can be executed from the command line to perform transcription tasks, providing a - user-friendly way to access the AutoTranscribe class functionalities. + user-friendly way to access the Scraibe class functionalities. """ def str2bool(string): @@ -115,7 +113,7 @@ def cli(): if arg_dict["whisper_model_directory"]: class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory") - model = AutoTranscribe(**class_kwargs) + model = Scraibe(**class_kwargs) if arg_dict["audio_files"]: diff --git a/scraibe/misc.py b/scraibe/misc.py index 399fcbb..b1afeea 100644 --- a/scraibe/misc.py +++ b/scraibe/misc.py @@ -14,7 +14,6 @@ WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper") PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote") PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml") - def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None: """Configure diarization pipeline from a YAML file. diff --git a/scraibe/transcriber.py b/scraibe/transcriber.py index d4c6344..dbb290e 100644 --- a/scraibe/transcriber.py +++ b/scraibe/transcriber.py @@ -90,8 +90,8 @@ class Transcriber: kwargs = self._get_whisper_kwargs(**kwargs) - if "verbose" not in kwargs: - kwargs["verbose"] = False + if not kwargs.get("verbose"): + kwargs["verbose"] = None result = self.model.transcribe(audio, *args, **kwargs) return result["text"] diff --git a/transcribe.py b/transcribe.py index 094dcfe..5a22ff3 100644 --- a/transcribe.py +++ b/transcribe.py @@ -1,36 +1,8 @@ -# import os -# import sys -# import traceback -# class TracePrints(object): -# def __init__(self): -# self.stdout = sys.stdout -# def write(self, s): -# self.stdout.write("Writing %r\n" % s) -# traceback.print_stack(file=self.stdout) +from scraibe import Scraibe +model = Scraibe() -# sys.stdout = TracePrints() - -# os.environ["PYANNOTE_CACHE"] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models/pyannote") -# import os - -# os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models") -# os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models") - - -from scraibe import AutoTranscribe -model = AutoTranscribe() - -text = model.autotranscribe('kida.mp4', num_speakers=2) +text = model.autotranscocribe('kida.mp4', num_speakers=2) print("Transcription:\n") print(text) - -# from autotranscript.misc import * -# import os - -# print(os.path.exists(CACHE_DIR)) -# print(os.path.exists(WHISPER_DEFAULT_PATH)) -# print(os.path.exists(PYANNOTE_DEFAULT_PATH)) - -# print(os.path.exists(PYANNOTE_DEFAULT_CONFIG))