change class name vom AutoTranscribe to Scraibe
This commit is contained in:
@@ -3,7 +3,7 @@ Gradio Audio Transcription App.
|
||||
--------------------------------
|
||||
|
||||
This module provides an interface to transcribe audio files using the
|
||||
AutoTranscribe model. Users can either upload an audio file or record their speech
|
||||
Scraibe model. Users can either upload an audio file or record their speech
|
||||
live for transcription. The application supports multiple languages and provides
|
||||
options to specify the number of speakers and the language of the audio.
|
||||
|
||||
@@ -20,7 +20,7 @@ Gradio Audio Transcription App.
|
||||
--------------------------------
|
||||
|
||||
This module provides an interface to transcribe audio files using the
|
||||
AutoTranscribe model. Users can either upload an audio file or record their speech
|
||||
Scraibe model. Users can either upload an audio file or record their speech
|
||||
live for transcription. The application supports multiple languages and provides
|
||||
options to specify the number of speakers and the language of the audio.
|
||||
|
||||
@@ -37,7 +37,7 @@ import json
|
||||
import gradio as gr
|
||||
from tqdm import tqdm
|
||||
|
||||
from scraibe import AutoTranscribe, Transcript
|
||||
from scraibe import Scraibe, Transcript
|
||||
|
||||
theme = gr.themes.Soft(
|
||||
primary_hue="green",
|
||||
@@ -65,12 +65,12 @@ class GradioTranscriptionInterface:
|
||||
Interface handling the interaction between Gradio UI and the Audio Transcription system.
|
||||
"""
|
||||
|
||||
def __init__(self, model: AutoTranscribe):
|
||||
def __init__(self, model: Scraibe):
|
||||
"""
|
||||
Initializes the GradioTranscriptionInterface with a transcription model.
|
||||
|
||||
Args:
|
||||
model (AutoTranscribe): Model responsible for audio transcription tasks.
|
||||
model (Scraibe): Model responsible for audio transcription tasks.
|
||||
"""
|
||||
self.model = model
|
||||
|
||||
@@ -79,7 +79,7 @@ class GradioTranscriptionInterface:
|
||||
translation : bool,
|
||||
language : str):
|
||||
"""
|
||||
Shortcut method for the AutoTranscribe task.
|
||||
Shortcut method for the Scraibe task.
|
||||
|
||||
Returns:
|
||||
tuple: Transcribed text (str), JSON output (dict)
|
||||
@@ -209,10 +209,10 @@ class GradioTranscriptionInterface:
|
||||
# Gradio Interface
|
||||
####
|
||||
|
||||
def gradio_Interface(model : AutoTranscribe = None):
|
||||
def gradio_Interface(model : Scraibe = None):
|
||||
|
||||
if model is None:
|
||||
model = AutoTranscribe()
|
||||
model = Scraibe()
|
||||
|
||||
pipe = GradioTranscriptionInterface(model)
|
||||
|
||||
|
||||
+22
-15
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
AutoTranscribe Class
|
||||
Scraibe Class
|
||||
--------------------
|
||||
|
||||
This class serves as the core of the transcription system, responsible for handling
|
||||
@@ -12,15 +12,15 @@ By encapsulating the complexities of underlying models, it allows for straightfo
|
||||
integration into various applications, ranging from transcription services to voice assistants.
|
||||
|
||||
Available Classes:
|
||||
- AutoTranscribe: Main class for performing transcription and diarization.
|
||||
- Scraibe: Main class for performing transcription and diarization.
|
||||
Includes methods for loading models, processing audio files,
|
||||
and formatting the transcription output.
|
||||
|
||||
Usage:
|
||||
from .autotranscribe import AutoTranscribe
|
||||
from scraibe import Scraibe
|
||||
|
||||
model = AutoTranscribe(whisper_model="path/to/whisper/model", dia_model="path/to/diarisation/model")
|
||||
transcript = model.transcribe("path/to/audiofile.wav")
|
||||
model = Scraibe()
|
||||
transcript = model.autotranscribe("path/to/audiofile.wav")
|
||||
"""
|
||||
|
||||
# Standard Library Imports
|
||||
@@ -45,9 +45,9 @@ from .transcript_exporter import Transcript
|
||||
DiarisationType = TypeVar('DiarisationType')
|
||||
|
||||
|
||||
class AutoTranscribe:
|
||||
class Scraibe:
|
||||
"""
|
||||
AutoTranscribe is a class responsible for managing the transcription and diarization of audio files.
|
||||
Scraibe is a class responsible for managing the transcription and diarization of audio files.
|
||||
It serves as the core of the transcription system, incorporating pretrained models
|
||||
for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio),
|
||||
allowing for comprehensive audio processing.
|
||||
@@ -57,7 +57,7 @@ class AutoTranscribe:
|
||||
diariser (Diariser): The diariser object to handle diarization.
|
||||
|
||||
Methods:
|
||||
__init__: Initializes the AutoTranscribe class with appropriate models.
|
||||
__init__: Initializes the Scraibe class with appropriate models.
|
||||
transcribe: Transcribes an audio file using the whisper model and pyannote diarization model.
|
||||
remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy.
|
||||
get_audio_file: Gets an audio file as an AudioProcessor object.
|
||||
@@ -66,7 +66,7 @@ class AutoTranscribe:
|
||||
whisper_model: Union[bool, str, whisper] = None,
|
||||
dia_model : Union[bool, str, DiarisationType] = None,
|
||||
**kwargs) -> None:
|
||||
"""Initializes the AutoTranscribe class.
|
||||
"""Initializes the Scraibe class.
|
||||
|
||||
Args:
|
||||
whisper_model (Union[bool, str, whisper], optional):
|
||||
@@ -92,7 +92,11 @@ class AutoTranscribe:
|
||||
else:
|
||||
self.diariser = dia_model
|
||||
|
||||
print("AutoTranscribe initialized all models successfully loaded.")
|
||||
if kwargs.get("verbose"):
|
||||
print("Scraibe initialized all models successfully loaded.")
|
||||
self.verbose = True
|
||||
else:
|
||||
self.verbose = False
|
||||
|
||||
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
||||
remove_original : bool = False,
|
||||
@@ -112,7 +116,8 @@ class AutoTranscribe:
|
||||
Transcript: A Transcript object containing the transcription,
|
||||
which can be exported to different formats.
|
||||
"""
|
||||
|
||||
if kwargs.get("verbose"):
|
||||
self.verbose = kwargs.get("verbose")
|
||||
# Get audio file as an AudioProcessor object
|
||||
audio_file = self.get_audio_file(audio_file)
|
||||
|
||||
@@ -122,7 +127,8 @@ class AutoTranscribe:
|
||||
"sample_rate": audio_file.sr
|
||||
}
|
||||
|
||||
print("Starting diarisation.")
|
||||
if self.verbose:
|
||||
print("Starting diarisation.")
|
||||
|
||||
diarisation = self.diariser.diarization(dia_audio, **kwargs)
|
||||
|
||||
@@ -137,14 +143,15 @@ class AutoTranscribe:
|
||||
|
||||
return Transcript(final_transcript)
|
||||
|
||||
print("Diarisation finished. Starting transcription.")
|
||||
if self.verbose:
|
||||
print("Diarisation finished. Starting transcription.")
|
||||
|
||||
audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device)
|
||||
|
||||
# Transcribe each segment and store the results
|
||||
final_transcript = dict()
|
||||
|
||||
for i in trange(len(diarisation["segments"]), desc= "Transcribing"):
|
||||
for i in trange(len(diarisation["segments"]), desc= "Transcribing", disable = not self.verbose):
|
||||
|
||||
seg = diarisation["segments"][i]
|
||||
|
||||
@@ -280,4 +287,4 @@ class AutoTranscribe:
|
||||
return audio_file
|
||||
|
||||
def __repr__(self):
|
||||
return f"AutoTranscribe(transcriber={self.transcriber}, diariser={self.diariser})"
|
||||
return f"Scraibe(transcriber={self.transcriber}, diariser={self.diariser})"
|
||||
|
||||
+5
-7
@@ -1,5 +1,5 @@
|
||||
"""
|
||||
Command-Line Interface (CLI) for the AutoTranscribe class,
|
||||
Command-Line Interface (CLI) for the Scraibe class,
|
||||
allowing for user interaction to transcribe and diarize audio files.
|
||||
The function includes arguments for specifying the audio files, model paths,
|
||||
output formats, and other options necessary for transcription.
|
||||
@@ -8,9 +8,7 @@ import os
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
import json
|
||||
|
||||
from sympy import use
|
||||
|
||||
from .autotranscript import AutoTranscribe
|
||||
from .autotranscript import Scraibe
|
||||
from .app.gradio_app import gradio_Interface
|
||||
|
||||
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
||||
@@ -20,12 +18,12 @@ from torch import set_num_threads
|
||||
|
||||
def cli():
|
||||
"""
|
||||
Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe
|
||||
Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe
|
||||
and diarize audio files. The function includes arguments for specifying the audio files, model paths,
|
||||
output formats, and other options necessary for transcription.
|
||||
|
||||
This function can be executed from the command line to perform transcription tasks, providing a
|
||||
user-friendly way to access the AutoTranscribe class functionalities.
|
||||
user-friendly way to access the Scraibe class functionalities.
|
||||
"""
|
||||
|
||||
def str2bool(string):
|
||||
@@ -115,7 +113,7 @@ def cli():
|
||||
if arg_dict["whisper_model_directory"]:
|
||||
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
|
||||
|
||||
model = AutoTranscribe(**class_kwargs)
|
||||
model = Scraibe(**class_kwargs)
|
||||
|
||||
|
||||
if arg_dict["audio_files"]:
|
||||
|
||||
@@ -14,7 +14,6 @@ WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper")
|
||||
PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote")
|
||||
PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")
|
||||
|
||||
|
||||
def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None:
|
||||
"""Configure diarization pipeline from a YAML file.
|
||||
|
||||
|
||||
@@ -90,8 +90,8 @@ class Transcriber:
|
||||
|
||||
kwargs = self._get_whisper_kwargs(**kwargs)
|
||||
|
||||
if "verbose" not in kwargs:
|
||||
kwargs["verbose"] = False
|
||||
if not kwargs.get("verbose"):
|
||||
kwargs["verbose"] = None
|
||||
|
||||
result = self.model.transcribe(audio, *args, **kwargs)
|
||||
return result["text"]
|
||||
|
||||
+3
-31
@@ -1,36 +1,8 @@
|
||||
# import os
|
||||
# import sys
|
||||
# import traceback
|
||||
|
||||
# class TracePrints(object):
|
||||
# def __init__(self):
|
||||
# self.stdout = sys.stdout
|
||||
# def write(self, s):
|
||||
# self.stdout.write("Writing %r\n" % s)
|
||||
# traceback.print_stack(file=self.stdout)
|
||||
from scraibe import Scraibe
|
||||
model = Scraibe()
|
||||
|
||||
# sys.stdout = TracePrints()
|
||||
|
||||
# os.environ["PYANNOTE_CACHE"] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models/pyannote")
|
||||
# import os
|
||||
|
||||
# os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
|
||||
# os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
|
||||
|
||||
|
||||
from scraibe import AutoTranscribe
|
||||
model = AutoTranscribe()
|
||||
|
||||
text = model.autotranscribe('kida.mp4', num_speakers=2)
|
||||
text = model.autotranscocribe('kida.mp4', num_speakers=2)
|
||||
|
||||
print("Transcription:\n")
|
||||
print(text)
|
||||
|
||||
# from autotranscript.misc import *
|
||||
# import os
|
||||
|
||||
# print(os.path.exists(CACHE_DIR))
|
||||
# print(os.path.exists(WHISPER_DEFAULT_PATH))
|
||||
# print(os.path.exists(PYANNOTE_DEFAULT_PATH))
|
||||
|
||||
# print(os.path.exists(PYANNOTE_DEFAULT_CONFIG))
|
||||
|
||||
Reference in New Issue
Block a user