change class name vom AutoTranscribe to Scraibe

This commit is contained in:
Jaikinator
2023-09-22 13:32:55 +02:00
parent bc1dd9d541
commit 1707777c64
6 changed files with 41 additions and 65 deletions
+8 -8
View File
@@ -3,7 +3,7 @@ Gradio Audio Transcription App.
-------------------------------- --------------------------------
This module provides an interface to transcribe audio files using the This module provides an interface to transcribe audio files using the
AutoTranscribe model. Users can either upload an audio file or record their speech Scraibe model. Users can either upload an audio file or record their speech
live for transcription. The application supports multiple languages and provides live for transcription. The application supports multiple languages and provides
options to specify the number of speakers and the language of the audio. options to specify the number of speakers and the language of the audio.
@@ -20,7 +20,7 @@ Gradio Audio Transcription App.
-------------------------------- --------------------------------
This module provides an interface to transcribe audio files using the This module provides an interface to transcribe audio files using the
AutoTranscribe model. Users can either upload an audio file or record their speech Scraibe model. Users can either upload an audio file or record their speech
live for transcription. The application supports multiple languages and provides live for transcription. The application supports multiple languages and provides
options to specify the number of speakers and the language of the audio. options to specify the number of speakers and the language of the audio.
@@ -37,7 +37,7 @@ import json
import gradio as gr import gradio as gr
from tqdm import tqdm from tqdm import tqdm
from scraibe import AutoTranscribe, Transcript from scraibe import Scraibe, Transcript
theme = gr.themes.Soft( theme = gr.themes.Soft(
primary_hue="green", primary_hue="green",
@@ -65,12 +65,12 @@ class GradioTranscriptionInterface:
Interface handling the interaction between Gradio UI and the Audio Transcription system. Interface handling the interaction between Gradio UI and the Audio Transcription system.
""" """
def __init__(self, model: AutoTranscribe): def __init__(self, model: Scraibe):
""" """
Initializes the GradioTranscriptionInterface with a transcription model. Initializes the GradioTranscriptionInterface with a transcription model.
Args: Args:
model (AutoTranscribe): Model responsible for audio transcription tasks. model (Scraibe): Model responsible for audio transcription tasks.
""" """
self.model = model self.model = model
@@ -79,7 +79,7 @@ class GradioTranscriptionInterface:
translation : bool, translation : bool,
language : str): language : str):
""" """
Shortcut method for the AutoTranscribe task. Shortcut method for the Scraibe task.
Returns: Returns:
tuple: Transcribed text (str), JSON output (dict) tuple: Transcribed text (str), JSON output (dict)
@@ -209,10 +209,10 @@ class GradioTranscriptionInterface:
# Gradio Interface # Gradio Interface
#### ####
def gradio_Interface(model : AutoTranscribe = None): def gradio_Interface(model : Scraibe = None):
if model is None: if model is None:
model = AutoTranscribe() model = Scraibe()
pipe = GradioTranscriptionInterface(model) pipe = GradioTranscriptionInterface(model)
+22 -15
View File
@@ -1,5 +1,5 @@
""" """
AutoTranscribe Class Scraibe Class
-------------------- --------------------
This class serves as the core of the transcription system, responsible for handling This class serves as the core of the transcription system, responsible for handling
@@ -12,15 +12,15 @@ By encapsulating the complexities of underlying models, it allows for straightfo
integration into various applications, ranging from transcription services to voice assistants. integration into various applications, ranging from transcription services to voice assistants.
Available Classes: Available Classes:
- AutoTranscribe: Main class for performing transcription and diarization. - Scraibe: Main class for performing transcription and diarization.
Includes methods for loading models, processing audio files, Includes methods for loading models, processing audio files,
and formatting the transcription output. and formatting the transcription output.
Usage: Usage:
from .autotranscribe import AutoTranscribe from scraibe import Scraibe
model = AutoTranscribe(whisper_model="path/to/whisper/model", dia_model="path/to/diarisation/model") model = Scraibe()
transcript = model.transcribe("path/to/audiofile.wav") transcript = model.autotranscribe("path/to/audiofile.wav")
""" """
# Standard Library Imports # Standard Library Imports
@@ -45,9 +45,9 @@ from .transcript_exporter import Transcript
DiarisationType = TypeVar('DiarisationType') DiarisationType = TypeVar('DiarisationType')
class AutoTranscribe: class Scraibe:
""" """
AutoTranscribe is a class responsible for managing the transcription and diarization of audio files. Scraibe is a class responsible for managing the transcription and diarization of audio files.
It serves as the core of the transcription system, incorporating pretrained models It serves as the core of the transcription system, incorporating pretrained models
for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio), for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio),
allowing for comprehensive audio processing. allowing for comprehensive audio processing.
@@ -57,7 +57,7 @@ class AutoTranscribe:
diariser (Diariser): The diariser object to handle diarization. diariser (Diariser): The diariser object to handle diarization.
Methods: Methods:
__init__: Initializes the AutoTranscribe class with appropriate models. __init__: Initializes the Scraibe class with appropriate models.
transcribe: Transcribes an audio file using the whisper model and pyannote diarization model. transcribe: Transcribes an audio file using the whisper model and pyannote diarization model.
remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy. remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy.
get_audio_file: Gets an audio file as an AudioProcessor object. get_audio_file: Gets an audio file as an AudioProcessor object.
@@ -66,7 +66,7 @@ class AutoTranscribe:
whisper_model: Union[bool, str, whisper] = None, whisper_model: Union[bool, str, whisper] = None,
dia_model : Union[bool, str, DiarisationType] = None, dia_model : Union[bool, str, DiarisationType] = None,
**kwargs) -> None: **kwargs) -> None:
"""Initializes the AutoTranscribe class. """Initializes the Scraibe class.
Args: Args:
whisper_model (Union[bool, str, whisper], optional): whisper_model (Union[bool, str, whisper], optional):
@@ -92,7 +92,11 @@ class AutoTranscribe:
else: else:
self.diariser = dia_model self.diariser = dia_model
print("AutoTranscribe initialized all models successfully loaded.") if kwargs.get("verbose"):
print("Scraibe initialized all models successfully loaded.")
self.verbose = True
else:
self.verbose = False
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray], def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
remove_original : bool = False, remove_original : bool = False,
@@ -112,7 +116,8 @@ class AutoTranscribe:
Transcript: A Transcript object containing the transcription, Transcript: A Transcript object containing the transcription,
which can be exported to different formats. which can be exported to different formats.
""" """
if kwargs.get("verbose"):
self.verbose = kwargs.get("verbose")
# Get audio file as an AudioProcessor object # Get audio file as an AudioProcessor object
audio_file = self.get_audio_file(audio_file) audio_file = self.get_audio_file(audio_file)
@@ -122,7 +127,8 @@ class AutoTranscribe:
"sample_rate": audio_file.sr "sample_rate": audio_file.sr
} }
print("Starting diarisation.") if self.verbose:
print("Starting diarisation.")
diarisation = self.diariser.diarization(dia_audio, **kwargs) diarisation = self.diariser.diarization(dia_audio, **kwargs)
@@ -137,14 +143,15 @@ class AutoTranscribe:
return Transcript(final_transcript) return Transcript(final_transcript)
print("Diarisation finished. Starting transcription.") if self.verbose:
print("Diarisation finished. Starting transcription.")
audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device) audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device)
# Transcribe each segment and store the results # Transcribe each segment and store the results
final_transcript = dict() final_transcript = dict()
for i in trange(len(diarisation["segments"]), desc= "Transcribing"): for i in trange(len(diarisation["segments"]), desc= "Transcribing", disable = not self.verbose):
seg = diarisation["segments"][i] seg = diarisation["segments"][i]
@@ -280,4 +287,4 @@ class AutoTranscribe:
return audio_file return audio_file
def __repr__(self): def __repr__(self):
return f"AutoTranscribe(transcriber={self.transcriber}, diariser={self.diariser})" return f"Scraibe(transcriber={self.transcriber}, diariser={self.diariser})"
+5 -7
View File
@@ -1,5 +1,5 @@
""" """
Command-Line Interface (CLI) for the AutoTranscribe class, Command-Line Interface (CLI) for the Scraibe class,
allowing for user interaction to transcribe and diarize audio files. allowing for user interaction to transcribe and diarize audio files.
The function includes arguments for specifying the audio files, model paths, The function includes arguments for specifying the audio files, model paths,
output formats, and other options necessary for transcription. output formats, and other options necessary for transcription.
@@ -8,9 +8,7 @@ import os
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import json import json
from sympy import use from .autotranscript import Scraibe
from .autotranscript import AutoTranscribe
from .app.gradio_app import gradio_Interface from .app.gradio_app import gradio_Interface
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
@@ -20,12 +18,12 @@ from torch import set_num_threads
def cli(): def cli():
""" """
Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe
and diarize audio files. The function includes arguments for specifying the audio files, model paths, and diarize audio files. The function includes arguments for specifying the audio files, model paths,
output formats, and other options necessary for transcription. output formats, and other options necessary for transcription.
This function can be executed from the command line to perform transcription tasks, providing a This function can be executed from the command line to perform transcription tasks, providing a
user-friendly way to access the AutoTranscribe class functionalities. user-friendly way to access the Scraibe class functionalities.
""" """
def str2bool(string): def str2bool(string):
@@ -115,7 +113,7 @@ def cli():
if arg_dict["whisper_model_directory"]: if arg_dict["whisper_model_directory"]:
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory") class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
model = AutoTranscribe(**class_kwargs) model = Scraibe(**class_kwargs)
if arg_dict["audio_files"]: if arg_dict["audio_files"]:
-1
View File
@@ -14,7 +14,6 @@ WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper")
PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote") PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote")
PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml") PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")
def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None: def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None:
"""Configure diarization pipeline from a YAML file. """Configure diarization pipeline from a YAML file.
+2 -2
View File
@@ -90,8 +90,8 @@ class Transcriber:
kwargs = self._get_whisper_kwargs(**kwargs) kwargs = self._get_whisper_kwargs(**kwargs)
if "verbose" not in kwargs: if not kwargs.get("verbose"):
kwargs["verbose"] = False kwargs["verbose"] = None
result = self.model.transcribe(audio, *args, **kwargs) result = self.model.transcribe(audio, *args, **kwargs)
return result["text"] return result["text"]
+3 -31
View File
@@ -1,36 +1,8 @@
# import os
# import sys
# import traceback
# class TracePrints(object): from scraibe import Scraibe
# def __init__(self): model = Scraibe()
# self.stdout = sys.stdout
# def write(self, s):
# self.stdout.write("Writing %r\n" % s)
# traceback.print_stack(file=self.stdout)
# sys.stdout = TracePrints() text = model.autotranscocribe('kida.mp4', num_speakers=2)
# os.environ["PYANNOTE_CACHE"] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models/pyannote")
# import os
# os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
# os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
from scraibe import AutoTranscribe
model = AutoTranscribe()
text = model.autotranscribe('kida.mp4', num_speakers=2)
print("Transcription:\n") print("Transcription:\n")
print(text) print(text)
# from autotranscript.misc import *
# import os
# print(os.path.exists(CACHE_DIR))
# print(os.path.exists(WHISPER_DEFAULT_PATH))
# print(os.path.exists(PYANNOTE_DEFAULT_PATH))
# print(os.path.exists(PYANNOTE_DEFAULT_CONFIG))