@@ -1,10 +1,13 @@
|
|||||||
from .autotranscript import *
|
from .autotranscript import *
|
||||||
from .app.qtfaststart import *
|
|
||||||
from .transcriber import *
|
from .transcriber import *
|
||||||
from .audio import *
|
from .audio import *
|
||||||
from .transcript_exporter import *
|
from .transcript_exporter import *
|
||||||
from .diarisation import *
|
from .diarisation import *
|
||||||
|
|
||||||
from .version import get_version as _get_version
|
from .version import get_version as _get_version
|
||||||
from .misc import *
|
from .misc import *
|
||||||
|
|
||||||
|
from .app.gradio_app import *
|
||||||
|
from .app.qtfaststart import *
|
||||||
|
|
||||||
__version__ = _get_version()
|
__version__ = _get_version()
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
from .qtfaststart import *
|
from .qtfaststart import *
|
||||||
|
from .gradio_app import *
|
||||||
@@ -1,3 +1,20 @@
|
|||||||
|
"""
|
||||||
|
Gradio Audio Transcription App.
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
This module provides an interface to transcribe audio files using the
|
||||||
|
AutoTranscribe model. Users can either upload an audio file or record their speech
|
||||||
|
live for transcription. The application supports multiple languages and provides
|
||||||
|
options to specify the number of speakers and the language of the audio.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
LANGUAGES (list): A list of supported languages for transcription.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
Run this script to start the Gradio web interface for audio transcription.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
from autotranscript import AutoTranscribe
|
from autotranscript import AutoTranscribe
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
|
|
||||||
@@ -18,13 +35,32 @@ LANGUAGES = [
|
|||||||
|
|
||||||
|
|
||||||
def gradio_server(model : AutoTranscribe):
|
def gradio_server(model : AutoTranscribe):
|
||||||
|
"""
|
||||||
|
Sets up and launches the Gradio interface for audio transcription.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model (AutoTranscribe): An instance of the AutoTranscribe model for transcription.
|
||||||
|
"""
|
||||||
def transcribe(audio, microphone, number_of_speakers, language):
|
def transcribe(audio, microphone, number_of_speakers, language):
|
||||||
|
"""
|
||||||
|
Transcribes the provided audio input based on the given parameters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio (str): Filepath to the uploaded audio file.
|
||||||
|
microphone (str): Filepath to the recorded audio.
|
||||||
|
number_of_speakers (int): Number of speakers in the audio.
|
||||||
|
language (str): Language of the audio content.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: Transcribed text (str), JSON output (dict)
|
||||||
|
"""
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
if number_of_speakers != 0:
|
if number_of_speakers != 0:
|
||||||
kwargs["num_speakers"] = number_of_speakers
|
kwargs["num_speakers"] = number_of_speakers
|
||||||
if language != "None":
|
if language != "None":
|
||||||
kwargs["language"] = language
|
kwargs["language"] = language
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
if audio is not None:
|
if audio is not None:
|
||||||
out = model.transcribe(audio, **kwargs)
|
out = model.transcribe(audio, **kwargs)
|
||||||
@@ -33,30 +69,31 @@ def gradio_server(model : AutoTranscribe):
|
|||||||
else:
|
else:
|
||||||
out = "Please upload an audio file or record one."
|
out = "Please upload an audio file or record one."
|
||||||
|
|
||||||
|
return str(out), out.get_json(), out.get_md()
|
||||||
return str(out)
|
|
||||||
|
|
||||||
gr.Interface(
|
gr.Interface(
|
||||||
fn=transcribe,
|
fn=transcribe,
|
||||||
inputs=[
|
inputs=[
|
||||||
gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", interactive=True),
|
gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File",
|
||||||
gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", interactive=True),
|
interactive=True),
|
||||||
gr.Number(value=0, label= "Number of speakers",
|
gr.Audio(source= "microphone", type="filepath", label="Record Your Audio",
|
||||||
|
interactive=True, container= False),
|
||||||
|
gr.Number(value=0, label= "Number of speakers (optional)",
|
||||||
info = "Number of speakers in the audio file. If you don't know, leave it at 0."),
|
info = "Number of speakers in the audio file. If you don't know, leave it at 0."),
|
||||||
# gr.Number(value=0, label= "Minimal number of speakers",
|
|
||||||
# info = "Minimal number of speakers in the audio file. If you don't know or you have specified Numspeakers, leave it at 0."),
|
|
||||||
gr.Dropdown(LANGUAGES,
|
gr.Dropdown(LANGUAGES,
|
||||||
label="Languages", default="None",
|
label="Language (optional)", value = "None",
|
||||||
info="Language of the audio file. If you don't know, leave it at None.")
|
info="Language of the audio file. If you don't know, leave it at None.")
|
||||||
],
|
],
|
||||||
outputs=[
|
outputs=[
|
||||||
"text"
|
gr.Textbox(label="Transcription"),
|
||||||
|
gr.JSON(label="Raw Output", container= False),
|
||||||
],
|
],
|
||||||
title="Audio Transcription",
|
title="Audio Transcription",
|
||||||
thumbnail = "Logo_KIDA.png",
|
|
||||||
description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!",
|
description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!",
|
||||||
theme="soft", # Example of a more modern theme
|
theme="soft", # Example of a more modern theme
|
||||||
).launch(share=True)
|
server_port=7860,
|
||||||
|
server_name="autotranscribe",
|
||||||
|
).queue().launch()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@@ -353,7 +353,7 @@ def cli():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
if start_server:
|
if start_server:
|
||||||
from .gradio_app import gradio_app
|
from .app.gradio_app import gradio_app
|
||||||
gradio_app(model)
|
gradio_app(model)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ class Transcript:
|
|||||||
sseg = time.strftime("%H:%M:%S",time.gmtime(segm[0]))
|
sseg = time.strftime("%H:%M:%S",time.gmtime(segm[0]))
|
||||||
eseg = time.strftime("%H:%M:%S",time.gmtime(segm[1]))
|
eseg = time.strftime("%H:%M:%S",time.gmtime(segm[1]))
|
||||||
|
|
||||||
fstring += f"{speaker} ({sseg} ; {eseg}): {seq['text']}\n"
|
fstring += f"{speaker} ({sseg} ; {eseg}):\t{seq['text']}\n"
|
||||||
return fstring
|
return fstring
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
@@ -128,7 +128,7 @@ class Transcript:
|
|||||||
:rtype: str
|
:rtype: str
|
||||||
"""
|
"""
|
||||||
if "indent" not in kwargs:
|
if "indent" not in kwargs:
|
||||||
kwargs["indent"] = 4
|
kwargs["indent"] = 3
|
||||||
return json.dumps(self.transcript, *args, **kwargs)
|
return json.dumps(self.transcript, *args, **kwargs)
|
||||||
|
|
||||||
def get_html(self) -> str:
|
def get_html(self) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user