Merge branch 'develop' into pipy-package

This commit is contained in:
Jacob Schmieder
2023-08-30 10:42:31 +02:00
committed by GitHub
5 changed files with 58 additions and 17 deletions
+5 -1
View File
@@ -1,11 +1,15 @@
from .autotranscript import * from .autotranscript import *
from .app.qtfaststart import *
from .transcriber import * from .transcriber import *
from .audio import * from .audio import *
from .transcript_exporter import * from .transcript_exporter import *
from .diarisation import * from .diarisation import *
from .version import get_version as _get_version from .version import get_version as _get_version
from .misc import * from .misc import *
from .app.gradio_app import *
from .app.qtfaststart import *
from .cli import * from .cli import *
__version__ = _get_version() __version__ = _get_version()
+2 -1
View File
@@ -1 +1,2 @@
from .qtfaststart import * from .qtfaststart import *
from .gradio_app import *
@@ -1,3 +1,20 @@
"""
Gradio Audio Transcription App.
--------------------------------
This module provides an interface to transcribe audio files using the
AutoTranscribe model. Users can either upload an audio file or record their speech
live for transcription. The application supports multiple languages and provides
options to specify the number of speakers and the language of the audio.
Attributes:
LANGUAGES (list): A list of supported languages for transcription.
Usage:
Run this script to start the Gradio web interface for audio transcription.
"""
from autotranscript import AutoTranscribe from autotranscript import AutoTranscribe
import gradio as gr import gradio as gr
@@ -18,13 +35,32 @@ LANGUAGES = [
def gradio_server(model : AutoTranscribe): def gradio_server(model : AutoTranscribe):
"""
Sets up and launches the Gradio interface for audio transcription.
Args:
model (AutoTranscribe): An instance of the AutoTranscribe model for transcription.
"""
def transcribe(audio, microphone, number_of_speakers, language): def transcribe(audio, microphone, number_of_speakers, language):
"""
Transcribes the provided audio input based on the given parameters.
Args:
audio (str): Filepath to the uploaded audio file.
microphone (str): Filepath to the recorded audio.
number_of_speakers (int): Number of speakers in the audio.
language (str): Language of the audio content.
Returns:
tuple: Transcribed text (str), JSON output (dict)
"""
kwargs = {} kwargs = {}
if number_of_speakers != 0: if number_of_speakers != 0:
kwargs["num_speakers"] = number_of_speakers kwargs["num_speakers"] = number_of_speakers
if language != "None": if language != "None":
kwargs["language"] = language kwargs["language"] = language
print()
if audio is not None: if audio is not None:
out = model.transcribe(audio, **kwargs) out = model.transcribe(audio, **kwargs)
@@ -33,30 +69,31 @@ def gradio_server(model : AutoTranscribe):
else: else:
out = "Please upload an audio file or record one." out = "Please upload an audio file or record one."
return str(out), out.get_json(), out.get_md()
return str(out)
gr.Interface( gr.Interface(
fn=transcribe, fn=transcribe,
inputs=[ inputs=[
gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", interactive=True), gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File",
gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", interactive=True), interactive=True),
gr.Number(value=0, label= "Number of speakers", gr.Audio(source= "microphone", type="filepath", label="Record Your Audio",
interactive=True, container= False),
gr.Number(value=0, label= "Number of speakers (optional)",
info = "Number of speakers in the audio file. If you don't know, leave it at 0."), info = "Number of speakers in the audio file. If you don't know, leave it at 0."),
# gr.Number(value=0, label= "Minimal number of speakers",
# info = "Minimal number of speakers in the audio file. If you don't know or you have specified Numspeakers, leave it at 0."),
gr.Dropdown(LANGUAGES, gr.Dropdown(LANGUAGES,
label="Languages", default="None", label="Language (optional)", value = "None",
info="Language of the audio file. If you don't know, leave it at None.") info="Language of the audio file. If you don't know, leave it at None.")
], ],
outputs=[ outputs=[
"text" gr.Textbox(label="Transcription"),
gr.JSON(label="Raw Output", container= False),
], ],
title="Audio Transcription", title="Audio Transcription",
thumbnail = "Logo_KIDA.png",
description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!", description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!",
theme="soft", # Example of a more modern theme theme="soft", # Example of a more modern theme
).launch(share=True) server_port=7860,
server_name="autotranscribe",
).queue().launch()
if __name__ == "__main__": if __name__ == "__main__":
+1 -2
View File
@@ -276,5 +276,4 @@ class AutoTranscribe:
if not isinstance(audio_file, AudioProcessor): if not isinstance(audio_file, AudioProcessor):
raise ValueError(f'Audiofile must be of type AudioProcessor,' \ raise ValueError(f'Audiofile must be of type AudioProcessor,' \
f'not {type(audio_file)}') f'not {type(audio_file)}')
return audio_file return audio_file
+2 -2
View File
@@ -99,7 +99,7 @@ class Transcript:
sseg = time.strftime("%H:%M:%S",time.gmtime(segm[0])) sseg = time.strftime("%H:%M:%S",time.gmtime(segm[0]))
eseg = time.strftime("%H:%M:%S",time.gmtime(segm[1])) eseg = time.strftime("%H:%M:%S",time.gmtime(segm[1]))
fstring += f"{speaker} ({sseg} ; {eseg}): {seq['text']}\n" fstring += f"{speaker} ({sseg} ; {eseg}):\t{seq['text']}\n"
return fstring return fstring
def __repr__(self) -> str: def __repr__(self) -> str:
@@ -128,7 +128,7 @@ class Transcript:
:rtype: str :rtype: str
""" """
if "indent" not in kwargs: if "indent" not in kwargs:
kwargs["indent"] = 4 kwargs["indent"] = 3
return json.dumps(self.transcript, *args, **kwargs) return json.dumps(self.transcript, *args, **kwargs)
def get_html(self) -> str: def get_html(self) -> str: