change class name vom AutoTranscribe to Scraibe

This commit is contained in:
Jaikinator
2023-09-22 13:32:55 +02:00
parent bc1dd9d541
commit 1707777c64
6 changed files with 41 additions and 65 deletions
+8 -8
View File
@@ -3,7 +3,7 @@ Gradio Audio Transcription App.
--------------------------------
This module provides an interface to transcribe audio files using the
AutoTranscribe model. Users can either upload an audio file or record their speech
Scraibe model. Users can either upload an audio file or record their speech
live for transcription. The application supports multiple languages and provides
options to specify the number of speakers and the language of the audio.
@@ -20,7 +20,7 @@ Gradio Audio Transcription App.
--------------------------------
This module provides an interface to transcribe audio files using the
AutoTranscribe model. Users can either upload an audio file or record their speech
Scraibe model. Users can either upload an audio file or record their speech
live for transcription. The application supports multiple languages and provides
options to specify the number of speakers and the language of the audio.
@@ -37,7 +37,7 @@ import json
import gradio as gr
from tqdm import tqdm
from scraibe import AutoTranscribe, Transcript
from scraibe import Scraibe, Transcript
theme = gr.themes.Soft(
primary_hue="green",
@@ -65,12 +65,12 @@ class GradioTranscriptionInterface:
Interface handling the interaction between Gradio UI and the Audio Transcription system.
"""
def __init__(self, model: AutoTranscribe):
def __init__(self, model: Scraibe):
"""
Initializes the GradioTranscriptionInterface with a transcription model.
Args:
model (AutoTranscribe): Model responsible for audio transcription tasks.
model (Scraibe): Model responsible for audio transcription tasks.
"""
self.model = model
@@ -79,7 +79,7 @@ class GradioTranscriptionInterface:
translation : bool,
language : str):
"""
Shortcut method for the AutoTranscribe task.
Shortcut method for the Scraibe task.
Returns:
tuple: Transcribed text (str), JSON output (dict)
@@ -209,10 +209,10 @@ class GradioTranscriptionInterface:
# Gradio Interface
####
def gradio_Interface(model : AutoTranscribe = None):
def gradio_Interface(model : Scraibe = None):
if model is None:
model = AutoTranscribe()
model = Scraibe()
pipe = GradioTranscriptionInterface(model)
+20 -13
View File
@@ -1,5 +1,5 @@
"""
AutoTranscribe Class
Scraibe Class
--------------------
This class serves as the core of the transcription system, responsible for handling
@@ -12,15 +12,15 @@ By encapsulating the complexities of underlying models, it allows for straightfo
integration into various applications, ranging from transcription services to voice assistants.
Available Classes:
- AutoTranscribe: Main class for performing transcription and diarization.
- Scraibe: Main class for performing transcription and diarization.
Includes methods for loading models, processing audio files,
and formatting the transcription output.
Usage:
from .autotranscribe import AutoTranscribe
from scraibe import Scraibe
model = AutoTranscribe(whisper_model="path/to/whisper/model", dia_model="path/to/diarisation/model")
transcript = model.transcribe("path/to/audiofile.wav")
model = Scraibe()
transcript = model.autotranscribe("path/to/audiofile.wav")
"""
# Standard Library Imports
@@ -45,9 +45,9 @@ from .transcript_exporter import Transcript
DiarisationType = TypeVar('DiarisationType')
class AutoTranscribe:
class Scraibe:
"""
AutoTranscribe is a class responsible for managing the transcription and diarization of audio files.
Scraibe is a class responsible for managing the transcription and diarization of audio files.
It serves as the core of the transcription system, incorporating pretrained models
for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio),
allowing for comprehensive audio processing.
@@ -57,7 +57,7 @@ class AutoTranscribe:
diariser (Diariser): The diariser object to handle diarization.
Methods:
__init__: Initializes the AutoTranscribe class with appropriate models.
__init__: Initializes the Scraibe class with appropriate models.
transcribe: Transcribes an audio file using the whisper model and pyannote diarization model.
remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy.
get_audio_file: Gets an audio file as an AudioProcessor object.
@@ -66,7 +66,7 @@ class AutoTranscribe:
whisper_model: Union[bool, str, whisper] = None,
dia_model : Union[bool, str, DiarisationType] = None,
**kwargs) -> None:
"""Initializes the AutoTranscribe class.
"""Initializes the Scraibe class.
Args:
whisper_model (Union[bool, str, whisper], optional):
@@ -92,7 +92,11 @@ class AutoTranscribe:
else:
self.diariser = dia_model
print("AutoTranscribe initialized all models successfully loaded.")
if kwargs.get("verbose"):
print("Scraibe initialized all models successfully loaded.")
self.verbose = True
else:
self.verbose = False
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
remove_original : bool = False,
@@ -112,7 +116,8 @@ class AutoTranscribe:
Transcript: A Transcript object containing the transcription,
which can be exported to different formats.
"""
if kwargs.get("verbose"):
self.verbose = kwargs.get("verbose")
# Get audio file as an AudioProcessor object
audio_file = self.get_audio_file(audio_file)
@@ -122,6 +127,7 @@ class AutoTranscribe:
"sample_rate": audio_file.sr
}
if self.verbose:
print("Starting diarisation.")
diarisation = self.diariser.diarization(dia_audio, **kwargs)
@@ -137,6 +143,7 @@ class AutoTranscribe:
return Transcript(final_transcript)
if self.verbose:
print("Diarisation finished. Starting transcription.")
audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device)
@@ -144,7 +151,7 @@ class AutoTranscribe:
# Transcribe each segment and store the results
final_transcript = dict()
for i in trange(len(diarisation["segments"]), desc= "Transcribing"):
for i in trange(len(diarisation["segments"]), desc= "Transcribing", disable = not self.verbose):
seg = diarisation["segments"][i]
@@ -280,4 +287,4 @@ class AutoTranscribe:
return audio_file
def __repr__(self):
return f"AutoTranscribe(transcriber={self.transcriber}, diariser={self.diariser})"
return f"Scraibe(transcriber={self.transcriber}, diariser={self.diariser})"
+5 -7
View File
@@ -1,5 +1,5 @@
"""
Command-Line Interface (CLI) for the AutoTranscribe class,
Command-Line Interface (CLI) for the Scraibe class,
allowing for user interaction to transcribe and diarize audio files.
The function includes arguments for specifying the audio files, model paths,
output formats, and other options necessary for transcription.
@@ -8,9 +8,7 @@ import os
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import json
from sympy import use
from .autotranscript import AutoTranscribe
from .autotranscript import Scraibe
from .app.gradio_app import gradio_Interface
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
@@ -20,12 +18,12 @@ from torch import set_num_threads
def cli():
"""
Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe
Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe
and diarize audio files. The function includes arguments for specifying the audio files, model paths,
output formats, and other options necessary for transcription.
This function can be executed from the command line to perform transcription tasks, providing a
user-friendly way to access the AutoTranscribe class functionalities.
user-friendly way to access the Scraibe class functionalities.
"""
def str2bool(string):
@@ -115,7 +113,7 @@ def cli():
if arg_dict["whisper_model_directory"]:
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
model = AutoTranscribe(**class_kwargs)
model = Scraibe(**class_kwargs)
if arg_dict["audio_files"]:
-1
View File
@@ -14,7 +14,6 @@ WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper")
PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote")
PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")
def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None:
"""Configure diarization pipeline from a YAML file.
+2 -2
View File
@@ -90,8 +90,8 @@ class Transcriber:
kwargs = self._get_whisper_kwargs(**kwargs)
if "verbose" not in kwargs:
kwargs["verbose"] = False
if not kwargs.get("verbose"):
kwargs["verbose"] = None
result = self.model.transcribe(audio, *args, **kwargs)
return result["text"]
+3 -31
View File
@@ -1,36 +1,8 @@
# import os
# import sys
# import traceback
# class TracePrints(object):
# def __init__(self):
# self.stdout = sys.stdout
# def write(self, s):
# self.stdout.write("Writing %r\n" % s)
# traceback.print_stack(file=self.stdout)
from scraibe import Scraibe
model = Scraibe()
# sys.stdout = TracePrints()
# os.environ["PYANNOTE_CACHE"] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models/pyannote")
# import os
# os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
# os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
from scraibe import AutoTranscribe
model = AutoTranscribe()
text = model.autotranscribe('kida.mp4', num_speakers=2)
text = model.autotranscocribe('kida.mp4', num_speakers=2)
print("Transcription:\n")
print(text)
# from autotranscript.misc import *
# import os
# print(os.path.exists(CACHE_DIR))
# print(os.path.exists(WHISPER_DEFAULT_PATH))
# print(os.path.exists(PYANNOTE_DEFAULT_PATH))
# print(os.path.exists(PYANNOTE_DEFAULT_CONFIG))