change class name vom AutoTranscribe to Scraibe
This commit is contained in:
@@ -3,7 +3,7 @@ Gradio Audio Transcription App.
|
|||||||
--------------------------------
|
--------------------------------
|
||||||
|
|
||||||
This module provides an interface to transcribe audio files using the
|
This module provides an interface to transcribe audio files using the
|
||||||
AutoTranscribe model. Users can either upload an audio file or record their speech
|
Scraibe model. Users can either upload an audio file or record their speech
|
||||||
live for transcription. The application supports multiple languages and provides
|
live for transcription. The application supports multiple languages and provides
|
||||||
options to specify the number of speakers and the language of the audio.
|
options to specify the number of speakers and the language of the audio.
|
||||||
|
|
||||||
@@ -20,7 +20,7 @@ Gradio Audio Transcription App.
|
|||||||
--------------------------------
|
--------------------------------
|
||||||
|
|
||||||
This module provides an interface to transcribe audio files using the
|
This module provides an interface to transcribe audio files using the
|
||||||
AutoTranscribe model. Users can either upload an audio file or record their speech
|
Scraibe model. Users can either upload an audio file or record their speech
|
||||||
live for transcription. The application supports multiple languages and provides
|
live for transcription. The application supports multiple languages and provides
|
||||||
options to specify the number of speakers and the language of the audio.
|
options to specify the number of speakers and the language of the audio.
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ import json
|
|||||||
import gradio as gr
|
import gradio as gr
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from scraibe import AutoTranscribe, Transcript
|
from scraibe import Scraibe, Transcript
|
||||||
|
|
||||||
theme = gr.themes.Soft(
|
theme = gr.themes.Soft(
|
||||||
primary_hue="green",
|
primary_hue="green",
|
||||||
@@ -65,12 +65,12 @@ class GradioTranscriptionInterface:
|
|||||||
Interface handling the interaction between Gradio UI and the Audio Transcription system.
|
Interface handling the interaction between Gradio UI and the Audio Transcription system.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, model: AutoTranscribe):
|
def __init__(self, model: Scraibe):
|
||||||
"""
|
"""
|
||||||
Initializes the GradioTranscriptionInterface with a transcription model.
|
Initializes the GradioTranscriptionInterface with a transcription model.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model (AutoTranscribe): Model responsible for audio transcription tasks.
|
model (Scraibe): Model responsible for audio transcription tasks.
|
||||||
"""
|
"""
|
||||||
self.model = model
|
self.model = model
|
||||||
|
|
||||||
@@ -79,7 +79,7 @@ class GradioTranscriptionInterface:
|
|||||||
translation : bool,
|
translation : bool,
|
||||||
language : str):
|
language : str):
|
||||||
"""
|
"""
|
||||||
Shortcut method for the AutoTranscribe task.
|
Shortcut method for the Scraibe task.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: Transcribed text (str), JSON output (dict)
|
tuple: Transcribed text (str), JSON output (dict)
|
||||||
@@ -209,10 +209,10 @@ class GradioTranscriptionInterface:
|
|||||||
# Gradio Interface
|
# Gradio Interface
|
||||||
####
|
####
|
||||||
|
|
||||||
def gradio_Interface(model : AutoTranscribe = None):
|
def gradio_Interface(model : Scraibe = None):
|
||||||
|
|
||||||
if model is None:
|
if model is None:
|
||||||
model = AutoTranscribe()
|
model = Scraibe()
|
||||||
|
|
||||||
pipe = GradioTranscriptionInterface(model)
|
pipe = GradioTranscriptionInterface(model)
|
||||||
|
|
||||||
|
|||||||
+23
-16
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
AutoTranscribe Class
|
Scraibe Class
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
This class serves as the core of the transcription system, responsible for handling
|
This class serves as the core of the transcription system, responsible for handling
|
||||||
@@ -12,15 +12,15 @@ By encapsulating the complexities of underlying models, it allows for straightfo
|
|||||||
integration into various applications, ranging from transcription services to voice assistants.
|
integration into various applications, ranging from transcription services to voice assistants.
|
||||||
|
|
||||||
Available Classes:
|
Available Classes:
|
||||||
- AutoTranscribe: Main class for performing transcription and diarization.
|
- Scraibe: Main class for performing transcription and diarization.
|
||||||
Includes methods for loading models, processing audio files,
|
Includes methods for loading models, processing audio files,
|
||||||
and formatting the transcription output.
|
and formatting the transcription output.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
from .autotranscribe import AutoTranscribe
|
from scraibe import Scraibe
|
||||||
|
|
||||||
model = AutoTranscribe(whisper_model="path/to/whisper/model", dia_model="path/to/diarisation/model")
|
model = Scraibe()
|
||||||
transcript = model.transcribe("path/to/audiofile.wav")
|
transcript = model.autotranscribe("path/to/audiofile.wav")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Standard Library Imports
|
# Standard Library Imports
|
||||||
@@ -45,9 +45,9 @@ from .transcript_exporter import Transcript
|
|||||||
DiarisationType = TypeVar('DiarisationType')
|
DiarisationType = TypeVar('DiarisationType')
|
||||||
|
|
||||||
|
|
||||||
class AutoTranscribe:
|
class Scraibe:
|
||||||
"""
|
"""
|
||||||
AutoTranscribe is a class responsible for managing the transcription and diarization of audio files.
|
Scraibe is a class responsible for managing the transcription and diarization of audio files.
|
||||||
It serves as the core of the transcription system, incorporating pretrained models
|
It serves as the core of the transcription system, incorporating pretrained models
|
||||||
for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio),
|
for speech-to-text (such as Whisper) and speaker diarization (such as pyannote.audio),
|
||||||
allowing for comprehensive audio processing.
|
allowing for comprehensive audio processing.
|
||||||
@@ -57,7 +57,7 @@ class AutoTranscribe:
|
|||||||
diariser (Diariser): The diariser object to handle diarization.
|
diariser (Diariser): The diariser object to handle diarization.
|
||||||
|
|
||||||
Methods:
|
Methods:
|
||||||
__init__: Initializes the AutoTranscribe class with appropriate models.
|
__init__: Initializes the Scraibe class with appropriate models.
|
||||||
transcribe: Transcribes an audio file using the whisper model and pyannote diarization model.
|
transcribe: Transcribes an audio file using the whisper model and pyannote diarization model.
|
||||||
remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy.
|
remove_audio_file: Removes the original audio file to avoid disk space issues or ensure data privacy.
|
||||||
get_audio_file: Gets an audio file as an AudioProcessor object.
|
get_audio_file: Gets an audio file as an AudioProcessor object.
|
||||||
@@ -66,7 +66,7 @@ class AutoTranscribe:
|
|||||||
whisper_model: Union[bool, str, whisper] = None,
|
whisper_model: Union[bool, str, whisper] = None,
|
||||||
dia_model : Union[bool, str, DiarisationType] = None,
|
dia_model : Union[bool, str, DiarisationType] = None,
|
||||||
**kwargs) -> None:
|
**kwargs) -> None:
|
||||||
"""Initializes the AutoTranscribe class.
|
"""Initializes the Scraibe class.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
whisper_model (Union[bool, str, whisper], optional):
|
whisper_model (Union[bool, str, whisper], optional):
|
||||||
@@ -92,7 +92,11 @@ class AutoTranscribe:
|
|||||||
else:
|
else:
|
||||||
self.diariser = dia_model
|
self.diariser = dia_model
|
||||||
|
|
||||||
print("AutoTranscribe initialized all models successfully loaded.")
|
if kwargs.get("verbose"):
|
||||||
|
print("Scraibe initialized all models successfully loaded.")
|
||||||
|
self.verbose = True
|
||||||
|
else:
|
||||||
|
self.verbose = False
|
||||||
|
|
||||||
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
||||||
remove_original : bool = False,
|
remove_original : bool = False,
|
||||||
@@ -112,7 +116,8 @@ class AutoTranscribe:
|
|||||||
Transcript: A Transcript object containing the transcription,
|
Transcript: A Transcript object containing the transcription,
|
||||||
which can be exported to different formats.
|
which can be exported to different formats.
|
||||||
"""
|
"""
|
||||||
|
if kwargs.get("verbose"):
|
||||||
|
self.verbose = kwargs.get("verbose")
|
||||||
# Get audio file as an AudioProcessor object
|
# Get audio file as an AudioProcessor object
|
||||||
audio_file = self.get_audio_file(audio_file)
|
audio_file = self.get_audio_file(audio_file)
|
||||||
|
|
||||||
@@ -121,8 +126,9 @@ class AutoTranscribe:
|
|||||||
"waveform" : audio_file.waveform.reshape(1,len(audio_file.waveform)),
|
"waveform" : audio_file.waveform.reshape(1,len(audio_file.waveform)),
|
||||||
"sample_rate": audio_file.sr
|
"sample_rate": audio_file.sr
|
||||||
}
|
}
|
||||||
|
|
||||||
print("Starting diarisation.")
|
if self.verbose:
|
||||||
|
print("Starting diarisation.")
|
||||||
|
|
||||||
diarisation = self.diariser.diarization(dia_audio, **kwargs)
|
diarisation = self.diariser.diarization(dia_audio, **kwargs)
|
||||||
|
|
||||||
@@ -137,14 +143,15 @@ class AutoTranscribe:
|
|||||||
|
|
||||||
return Transcript(final_transcript)
|
return Transcript(final_transcript)
|
||||||
|
|
||||||
print("Diarisation finished. Starting transcription.")
|
if self.verbose:
|
||||||
|
print("Diarisation finished. Starting transcription.")
|
||||||
|
|
||||||
audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device)
|
audio_file.sr = torch.Tensor([audio_file.sr]).to(audio_file.waveform.device)
|
||||||
|
|
||||||
# Transcribe each segment and store the results
|
# Transcribe each segment and store the results
|
||||||
final_transcript = dict()
|
final_transcript = dict()
|
||||||
|
|
||||||
for i in trange(len(diarisation["segments"]), desc= "Transcribing"):
|
for i in trange(len(diarisation["segments"]), desc= "Transcribing", disable = not self.verbose):
|
||||||
|
|
||||||
seg = diarisation["segments"][i]
|
seg = diarisation["segments"][i]
|
||||||
|
|
||||||
@@ -280,4 +287,4 @@ class AutoTranscribe:
|
|||||||
return audio_file
|
return audio_file
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"AutoTranscribe(transcriber={self.transcriber}, diariser={self.diariser})"
|
return f"Scraibe(transcriber={self.transcriber}, diariser={self.diariser})"
|
||||||
|
|||||||
+5
-7
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Command-Line Interface (CLI) for the AutoTranscribe class,
|
Command-Line Interface (CLI) for the Scraibe class,
|
||||||
allowing for user interaction to transcribe and diarize audio files.
|
allowing for user interaction to transcribe and diarize audio files.
|
||||||
The function includes arguments for specifying the audio files, model paths,
|
The function includes arguments for specifying the audio files, model paths,
|
||||||
output formats, and other options necessary for transcription.
|
output formats, and other options necessary for transcription.
|
||||||
@@ -8,9 +8,7 @@ import os
|
|||||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from sympy import use
|
from .autotranscript import Scraibe
|
||||||
|
|
||||||
from .autotranscript import AutoTranscribe
|
|
||||||
from .app.gradio_app import gradio_Interface
|
from .app.gradio_app import gradio_Interface
|
||||||
|
|
||||||
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
||||||
@@ -20,12 +18,12 @@ from torch import set_num_threads
|
|||||||
|
|
||||||
def cli():
|
def cli():
|
||||||
"""
|
"""
|
||||||
Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe
|
Command-Line Interface (CLI) for the Scraibe class, allowing for user interaction to transcribe
|
||||||
and diarize audio files. The function includes arguments for specifying the audio files, model paths,
|
and diarize audio files. The function includes arguments for specifying the audio files, model paths,
|
||||||
output formats, and other options necessary for transcription.
|
output formats, and other options necessary for transcription.
|
||||||
|
|
||||||
This function can be executed from the command line to perform transcription tasks, providing a
|
This function can be executed from the command line to perform transcription tasks, providing a
|
||||||
user-friendly way to access the AutoTranscribe class functionalities.
|
user-friendly way to access the Scraibe class functionalities.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def str2bool(string):
|
def str2bool(string):
|
||||||
@@ -115,7 +113,7 @@ def cli():
|
|||||||
if arg_dict["whisper_model_directory"]:
|
if arg_dict["whisper_model_directory"]:
|
||||||
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
|
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
|
||||||
|
|
||||||
model = AutoTranscribe(**class_kwargs)
|
model = Scraibe(**class_kwargs)
|
||||||
|
|
||||||
|
|
||||||
if arg_dict["audio_files"]:
|
if arg_dict["audio_files"]:
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper")
|
|||||||
PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote")
|
PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote")
|
||||||
PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")
|
PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")
|
||||||
|
|
||||||
|
|
||||||
def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None:
|
def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None:
|
||||||
"""Configure diarization pipeline from a YAML file.
|
"""Configure diarization pipeline from a YAML file.
|
||||||
|
|
||||||
|
|||||||
@@ -90,8 +90,8 @@ class Transcriber:
|
|||||||
|
|
||||||
kwargs = self._get_whisper_kwargs(**kwargs)
|
kwargs = self._get_whisper_kwargs(**kwargs)
|
||||||
|
|
||||||
if "verbose" not in kwargs:
|
if not kwargs.get("verbose"):
|
||||||
kwargs["verbose"] = False
|
kwargs["verbose"] = None
|
||||||
|
|
||||||
result = self.model.transcribe(audio, *args, **kwargs)
|
result = self.model.transcribe(audio, *args, **kwargs)
|
||||||
return result["text"]
|
return result["text"]
|
||||||
|
|||||||
+3
-31
@@ -1,36 +1,8 @@
|
|||||||
# import os
|
|
||||||
# import sys
|
|
||||||
# import traceback
|
|
||||||
|
|
||||||
# class TracePrints(object):
|
from scraibe import Scraibe
|
||||||
# def __init__(self):
|
model = Scraibe()
|
||||||
# self.stdout = sys.stdout
|
|
||||||
# def write(self, s):
|
|
||||||
# self.stdout.write("Writing %r\n" % s)
|
|
||||||
# traceback.print_stack(file=self.stdout)
|
|
||||||
|
|
||||||
# sys.stdout = TracePrints()
|
text = model.autotranscocribe('kida.mp4', num_speakers=2)
|
||||||
|
|
||||||
# os.environ["PYANNOTE_CACHE"] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models/pyannote")
|
|
||||||
# import os
|
|
||||||
|
|
||||||
# os.environ['TRANSFORMERS_CACHE'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
|
|
||||||
# os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
|
|
||||||
|
|
||||||
|
|
||||||
from scraibe import AutoTranscribe
|
|
||||||
model = AutoTranscribe()
|
|
||||||
|
|
||||||
text = model.autotranscribe('kida.mp4', num_speakers=2)
|
|
||||||
|
|
||||||
print("Transcription:\n")
|
print("Transcription:\n")
|
||||||
print(text)
|
print(text)
|
||||||
|
|
||||||
# from autotranscript.misc import *
|
|
||||||
# import os
|
|
||||||
|
|
||||||
# print(os.path.exists(CACHE_DIR))
|
|
||||||
# print(os.path.exists(WHISPER_DEFAULT_PATH))
|
|
||||||
# print(os.path.exists(PYANNOTE_DEFAULT_PATH))
|
|
||||||
|
|
||||||
# print(os.path.exists(PYANNOTE_DEFAULT_CONFIG))
|
|
||||||
|
|||||||
Reference in New Issue
Block a user