From bbb2c848e31e04cee3635956e1e84a897de38519 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 20 Nov 2023 15:01:51 +0100 Subject: [PATCH] rework structure of gradio app --- scraibe/__init__.py | 3 +- scraibe/app/__init__.py | 7 +- scraibe/app/activity_tracker.py | 37 +++ scraibe/app/global_var.py | 9 + scraibe/app/gradio_app.py | 504 -------------------------------- scraibe/app/interactions.py | 145 +++++++++ scraibe/app/interface.py | 129 ++++++++ scraibe/app/stg.py | 157 ++++++++++ 8 files changed, 484 insertions(+), 507 deletions(-) create mode 100644 scraibe/app/activity_tracker.py create mode 100644 scraibe/app/global_var.py delete mode 100644 scraibe/app/gradio_app.py create mode 100644 scraibe/app/interactions.py create mode 100644 scraibe/app/interface.py create mode 100644 scraibe/app/stg.py diff --git a/scraibe/__init__.py b/scraibe/__init__.py index a3a2b17..3fd77e8 100644 --- a/scraibe/__init__.py +++ b/scraibe/__init__.py @@ -7,8 +7,7 @@ from .diarisation import * from .version import get_version as _get_version from .misc import * -from .app.gradio_app import * -from .app.qtfaststart import * +from .app import * from .cli import * diff --git a/scraibe/app/__init__.py b/scraibe/app/__init__.py index dc00e7a..9e04a48 100644 --- a/scraibe/app/__init__.py +++ b/scraibe/app/__init__.py @@ -1,2 +1,7 @@ from .qtfaststart import * -from .gradio_app import * \ No newline at end of file +from .activity_tracker import * +from .interface import * +from .stg import * +from .interactions import * +from .global_var import * +from .app import * \ No newline at end of file diff --git a/scraibe/app/activity_tracker.py b/scraibe/app/activity_tracker.py new file mode 100644 index 0000000..5cced3b --- /dev/null +++ b/scraibe/app/activity_tracker.py @@ -0,0 +1,37 @@ +""" +This file contains the functions which are related to monitoring the actual app usage. +Therefore, the app is to be more efficient in the usage of the resources. +By for example, unloading or reloading the model. +""" +import time +import threading +import torch +import gc +import gradio as gr + + +timeout = 30 #seconds +USER_ACTIVE = True +user_active_lock = threading.Lock() # dummy for now + +# Create a thread to monitor user activity +def monitor_activity(model, pipe, timeout=timeout): + global USER_ACTIVE + + while True: + time.sleep(timeout) # Check user activity every second + with user_active_lock: + + if not USER_ACTIVE: + del model + del pipe + + gc.collect() + torch.cuda.empty_cache() + + + + print("Model deleted empty memory") + gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.") + break + USER_ACTIVE = False \ No newline at end of file diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py new file mode 100644 index 0000000..191e3e6 --- /dev/null +++ b/scraibe/app/global_var.py @@ -0,0 +1,9 @@ +""" +Stores global variables for the app. +""" + +# Global variable to store the model +MODEL = None + +# Global variable to track user activity +USER_ACTIVE = False \ No newline at end of file diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py deleted file mode 100644 index 6913643..0000000 --- a/scraibe/app/gradio_app.py +++ /dev/null @@ -1,504 +0,0 @@ -""" -Gradio Audio Transcription App. --------------------------------- - -This module provides an interface to transcribe audio files using the -Scraibe model. Users can either upload an audio file or record their speech -live for transcription. The application supports multiple languages and provides -options to specify the number of speakers and the language of the audio. - -Attributes: - LANGUAGES (list): A list of supported languages for transcription. - -Usage: - Run this script to start the Gradio web interface for audio transcription. - -""" - -""" -Gradio Audio Transcription App. --------------------------------- - -This module provides an interface to transcribe audio files using the -Scraibe model. Users can either upload an audio file or record their speech -live for transcription. The application supports multiple languages and provides -options to specify the number of speakers and the language of the audio. - -Attributes: - LANGUAGES (list): A list of supported languages for transcription. - -Usage: - Run this script to start the Gradio web interface for audio transcription. - -""" - - -import json -from math import pi -import os - -import gradio as gr -import threading -from tqdm import tqdm - -import time -from scraibe import Scraibe, Transcript - -theme = gr.themes.Soft( - primary_hue="green", - secondary_hue='orange', - neutral_hue="gray", -) - -LANGUAGES = [ - "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", - "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", - "Czech", "Danish", "Dutch", "English", "Estonian", - "Finnish", "French", "Galician", "German", "Greek", - "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", - "Italian", "Japanese", "Kannada", "Kazakh", "Korean", - "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", - "Maori", "Nepali", "Norwegian", "Persian", "Polish", - "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", - "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", - "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", - "Vietnamese", "Welsh" -] - -CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) - - -# Global variable to track user activity -USER_ACTIVE = True - -# Lock to synchronize access to user_active variable -user_active_lock = threading.Lock() - -# Function to reset the user activity flag -def reset_user_activity(): - global USER_ACTIVE - with user_active_lock: - USER_ACTIVE = True - -class GradioTranscriptionInterface: - """ - Interface handling the interaction between Gradio UI and the Audio Transcription system. - """ - - def __init__(self, model: Scraibe): - """ - Initializes the GradioTranscriptionInterface with a transcription model. - - Args: - model (Scraibe): Model responsible for audio transcription tasks. - """ - self.model = model - - def auto_transcribe(self, source, - num_speakers : int, - translation : bool, - language : str): - """ - Shortcut method for the Scraibe task. - - Returns: - tuple: Transcribed text (str), JSON output (dict) - """ - - kwargs = { - "num_speakers": num_speakers if num_speakers != 0 else None, - "language": language if language != "None" else None, - "task": 'translate' if translation else None - } - if isinstance(source, str): - try: - result = self.model.autotranscribe(source, **kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - - return str(result), result.get_json() - - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - try: - res = self.model.autotranscribe(s, **kwargs) - except ValueError: - _name = s.split("/")[-1] - res = f"NO TRANSCRIPT FOUND FOR {_name}" - gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.") - result.append(res) - - out = '' - out_dict = {} - for i, r in enumerate(result): - out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" - out += str(r) - out += "\n\n" - - if isinstance(r, str): - out_dict[source_names[i]] = r - else: - out_dict[source_names[i]] = r.get_dict() - - return out, json.dumps(out_dict, indent=4) - - else: - raise gr.Error("Please provide a valid audio file.") - - - def transcribe(self, source, translation, language): - """ - Shortcut method for the Transcribe task. - - Returns: - str: Transcribed text. - """ - kwargs = { - "language": language if language != "None" else None, - "task": 'translate' if translation == "Yes" else None - } - - if isinstance(source, str): - result = self.model.transcribe(source, **kwargs) - - return str(result) - - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - res = self.model.transcribe(s, **kwargs) - result.append(res) - - out = '' - for i, res in enumerate(result): - out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" - out += str(res) - out += "\n\n" - - return out - - else: - raise gr.Error("Please provide a valid audio file.") - - def perform_diarisation(self, source, num_speakers): - """ - Shortcut method for the Diarisation task. - - Returns: - str: JSON output of diarisation result. - """ - kwargs = { - "num_speakers": num_speakers if num_speakers != 0 else None, - } - - if isinstance(source, str): - try: - result = self.model.diarization(source, **kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - - return json.dumps(result, indent=2) - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Performing diarisation"): - try: - res = self.model.diarization(s, **kwargs) - except ValueError: - res = f"NO DIARISATION FOUND FOR {s}" - gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") - result.append(res) - - out = {} - - for i, res in enumerate(result): - out[source_names[i]] = res - - return json.dumps(out, indent=4) - - else: - gr.Error("Please provide a valid audio file.") - -#### -# Gradio Interface -#### - -def gradio_Interface(model : Scraibe = None, timeout = 1): - """ - Gradio Web interface for audio transcription. - - :param model: Scraibe model, defaults to None - :type model: Scraibe, optional - :param timeout: Time until model is unloaded, defaults to 600 seconds - :type timeout: int, optional - :return: Gradio Interface - :rtype: gradio.Interface - """ - - if model is None: - model = Scraibe() - - save_model_params = model.params - - pipe = GradioTranscriptionInterface(model) - - def select_task(choice): - # tell the app that it is still in use - reset_user_activity() - - if choice == 'Auto Transcribe': - - return (gr.update(visible = True), - gr.update(visible = True), - gr.update(visible = True)) - - - elif choice == 'Transcribe': - - return (gr.update(visible = False), - gr.update(visible = True), - gr.update(visible = True)) - - - elif choice == 'Diarisation': - - return (gr.update(visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - def select_origin(choice): - - # tell the app that it is still in use - reset_user_activity() - - if choice == "Upload Audio": - - return (gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Record Audio": - - return (gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Upload Video": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Record Video": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None)) - - elif choice == "File or Files": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True)) - - def run_scribe(task, - num_speakers, - translate, - language, - audio1, - audio2, - video1, - video2, - file_in, - progress = gr.Progress(track_tqdm= True)): - # get *args which are not None - - if not "model" in locals(): - gr.Warning("Model unloaded due to inactivity. Reloading the model, please wait.") - model = Scraibe(**save_model_params) - pipe = GradioTranscriptionInterface(model) - # # tell the app that it is still in use - reset_user_activity() - - progress(0, desc='Starting task...') - source = audio1 or audio2 or video1 or video2 or file_in - - if isinstance(source, list): - source = [s.name for s in source] - if len(source) == 1: - source = source[0] - - if task == 'Auto Transcribe': - - out_str , out_json = pipe.auto_transcribe(source = source, - num_speakers = num_speakers, - translation = translate, - language = language) - - if isinstance(source, str): - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = True), - gr.update(visible = True)) - else: - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - elif task == 'Transcribe': - - out = pipe.transcribe(source = source, - translation = translate, - language = language) - - return (gr.update(value = out, visible = True), - gr.update(value = None, visible = False), - gr.update(visible = False), - gr.update(visible = False)) - - elif task == 'Diarisation': - - out = pipe.perform_diarisation(source = source, - num_speakers = num_speakers) - - return (gr.update(value = None, visible = False), - gr.update(value = out, visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - def annotate_output(annoation : str, out_json : dict): - # get *args which are not None - - trans = Transcript.from_json(out_json) - trans = trans.annotate(*annoation.split(",")) - - return gr.update(value = str(trans)),gr.update(value = trans.get_json()) - - # Create a thread to monitor user activity - def monitor_activity(model, pipe, timeout=timeout): - global USER_ACTIVE - - while True: - time.sleep(timeout) # Check user activity every second - with user_active_lock: - - if not USER_ACTIVE: - del model - del pipe - print("Model deleted empty memory") - gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.") - break - USER_ACTIVE = False - - # Start the monitoring thread - activity_thread = threading.Thread(target=monitor_activity, args=(model, pipe)) - activity_thread.daemon = True - activity_thread.start() - - with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: - - # Define components - hname = os.path.join(CURRENT_PATH, "header.html") - header = open(hname, "r").read() - - # ugly hack to get the logo to work - header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) - - gr.HTML(header, visible= True, show_label=False) - - with gr.Row(): - - with gr.Column(): - - task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task", - value= 'Auto Transcribe') - - num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", - info = "Number of speakers in the audio file. If you don't know,\ - leave it at 0.", visible= True) - - translate = gr.Checkbox(label="Translation", choices=[True, False], value = False, - info="Select 'Yes' to have the output translated into English.", - visible= True) - - language = gr.Dropdown(LANGUAGES, - label="Language (optional)", value = "None", - info="Language of the audio file. If you don't know,\ - leave it at None.", visible= True) - - input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" - ,"File or Files"], label="Input Type", value="Upload Audio") - - audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", - interactive= True, visible= True) - audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath", - interactive= True, visible= False) - video1 = gr.Video(source="upload", type="filepath", label="Upload Video", - interactive= True, visible= False) - video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True, - interactive= True, visible= False) - file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False) - - submit = gr.Button() - - with gr.Column(): - - out_txt = gr.Textbox(label="Output", - visible= True, show_copy_button=True) - - out_json = gr.JSON(label="JSON Output", - visible= False, show_copy_button=True) - - annoation = gr.Textbox(label="Name your speaker's", - info= "Please provide a list of the speakers arranged \ - in the order in which they appear in the input. Use comma ',' \ - as a seperator. Be aware that the first name is given \ - to SPEAKER_00 the second to SPEAKER_01 and so on.", - visible= False, interactive= True) - - annotate = gr.Button(value="Annotate", visible= False, interactive= True) - - # Define usage of components - input.change(fn=select_origin, inputs=[input], - outputs=[audio1, audio2, video1, video2, file_in]) - - task.change(fn=select_task, inputs=[task], - outputs=[num_speakers, translate, language]) - - translate.change(fn= lambda x : gr.update(value = x), - inputs=[translate], outputs=[translate]) - num_speakers.change(fn= lambda x : gr.update(value = x), - inputs=[num_speakers], outputs=[num_speakers]) - language.change(fn= lambda x : gr.update(value = x), - inputs=[language], outputs=[language]) - - submit.click(fn = run_scribe, - inputs=[task, num_speakers, translate, language, audio1, - audio2, video1, video2, file_in], - outputs=[out_txt, out_json, annoation, annotate]) - - annotate.click(fn = annotate_output, inputs=[annoation, out_json], - outputs=[out_txt, out_json]) - - return demo - - -if __name__ == "__main__": - - gradio_Interface().queue().launch() \ No newline at end of file diff --git a/scraibe/app/interactions.py b/scraibe/app/interactions.py new file mode 100644 index 0000000..10659c0 --- /dev/null +++ b/scraibe/app/interactions.py @@ -0,0 +1,145 @@ +""" +This file contains ervery function that will be called when the user interacts with the +UI like pressing a button or uploading a file. +""" + +from math import pi +import gradio as gr +import scraibe.app.global_var as gv +from scraibe import Transcript + +def select_task(choice): + # tell the app that it is still in use + if choice == 'Auto Transcribe': + + return (gr.update(visible = True), + gr.update(visible = True), + gr.update(visible = True)) + + + elif choice == 'Transcribe': + + return (gr.update(visible = False), + gr.update(visible = True), + gr.update(visible = True)) + + + elif choice == 'Diarisation': + + return (gr.update(visible = True), + gr.update(visible = False), + gr.update(visible = False)) + +def select_origin(choice): + + # tell the app that it is still in use + if choice == "Upload Audio": + + return (gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Record Audio": + + return (gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Upload Video": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Record Video": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None)) + + elif choice == "File or Files": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True)) + +def run_scraibe(task, + num_speakers, + translate, + language, + audio1, + audio2, + video1, + video2, + file_in, + progress = gr.Progress(track_tqdm= True)): + + # get *args which are not None + + pipe = gv.MODEL + + progress(0, desc='Starting task...') + source = audio1 or audio2 or video1 or video2 or file_in + + if isinstance(source, list): + source = [s.name for s in source] + if len(source) == 1: + source = source[0] + + if task == 'Auto Transcribe': + + out_str , out_json = pipe.auto_transcribe(source = source, + num_speakers = num_speakers, + translation = translate, + language = language) + + if isinstance(source, str): + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = True), + gr.update(visible = True)) + else: + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = False), + gr.update(visible = False)) + + elif task == 'Transcribe': + + out = pipe.transcribe(source = source, + translation = translate, + language = language) + + return (gr.update(value = out, visible = True), + gr.update(value = None, visible = False), + gr.update(visible = False), + gr.update(visible = False)) + + elif task == 'Diarisation': + + out = pipe.perform_diarisation(source = source, + num_speakers = num_speakers) + + return (gr.update(value = None, visible = False), + gr.update(value = out, visible = True), + gr.update(visible = False), + gr.update(visible = False)) + +def annotate_output(annoation : str, out_json : dict): + # get *args which are not None + + trans = Transcript.from_json(out_json) + trans = trans.annotate(*annoation.split(",")) + + return gr.update(value = str(trans)),gr.update(value = trans.get_json()) + diff --git a/scraibe/app/interface.py b/scraibe/app/interface.py new file mode 100644 index 0000000..ef9d818 --- /dev/null +++ b/scraibe/app/interface.py @@ -0,0 +1,129 @@ +""" +This file contains the actual gradio Interface which is used to interact with the user. +""" + +import gradio as gr +import os + +import scraibe.app.global_var as gv +from .interactions import * +from .stg import * + +from scraibe import Scraibe + +theme = gr.themes.Soft( + primary_hue="green", + secondary_hue='orange', + neutral_hue="gray", +) + + +LANGUAGES = [ + "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", + "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", + "Czech", "Danish", "Dutch", "English", "Estonian", + "Finnish", "French", "Galician", "German", "Greek", + "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", + "Italian", "Japanese", "Kannada", "Kazakh", "Korean", + "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", + "Maori", "Nepali", "Norwegian", "Persian", "Polish", + "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", + "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", + "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", + "Vietnamese", "Welsh" +] + +CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) + + +def gradio_Interface(pipe : Scraibe = None): + + if pipe is not None: + gv.MODEL = GradioTranscriptionInterface(pipe) + + with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: + + # Define components + hname = os.path.join(CURRENT_PATH, "header.html") + header = open(hname, "r").read() + + # ugly hack to get the logo to work + header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) + + gr.HTML(header, visible= True, show_label=False) + + with gr.Row(): + + with gr.Column(): + + task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task", + value= 'Auto Transcribe') + + num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", + info = "Number of speakers in the audio file. If you don't know,\ + leave it at 0.", visible= True) + + translate = gr.Checkbox(label="Translation", choices=[True, False], value = False, + info="Select 'Yes' to have the output translated into English.", + visible= True) + + language = gr.Dropdown(LANGUAGES, + label="Language (optional)", value = "None", + info="Language of the audio file. If you don't know,\ + leave it at None.", visible= True) + + input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" + ,"File or Files"], label="Input Type", value="Upload Audio") + + audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", + interactive= True, visible= True) + audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath", + interactive= True, visible= False) + video1 = gr.Video(source="upload", type="filepath", label="Upload Video", + interactive= True, visible= False) + video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True, + interactive= True, visible= False) + file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False) + + submit = gr.Button() + + with gr.Column(): + + out_txt = gr.Textbox(label="Output", + visible= True, show_copy_button=True) + + out_json = gr.JSON(label="JSON Output", + visible= False, show_copy_button=True) + + annoation = gr.Textbox(label="Name your speaker's", + info= "Please provide a list of the speakers arranged \ + in the order in which they appear in the input. Use comma ',' \ + as a seperator. Be aware that the first name is given \ + to SPEAKER_00 the second to SPEAKER_01 and so on.", + visible= False, interactive= True) + + annotate = gr.Button(value="Annotate", visible= False, interactive= True) + + # Define usage of components + input.change(fn=select_origin, inputs=[input], + outputs=[audio1, audio2, video1, video2, file_in]) + + task.change(fn=select_task, inputs=[task], + outputs=[num_speakers, translate, language]) + + translate.change(fn= lambda x : gr.update(value = x), + inputs=[translate], outputs=[translate]) + num_speakers.change(fn= lambda x : gr.update(value = x), + inputs=[num_speakers], outputs=[num_speakers]) + language.change(fn= lambda x : gr.update(value = x), + inputs=[language], outputs=[language]) + + submit.click(fn = run_scraibe, + inputs=[task, num_speakers, translate, language, audio1, + audio2, video1, video2, file_in], + outputs=[out_txt, out_json, annoation, annotate]) + + annotate.click(fn = annotate_output, inputs=[annoation, out_json], + outputs=[out_txt, out_json]) + + return demo \ No newline at end of file diff --git a/scraibe/app/stg.py b/scraibe/app/stg.py new file mode 100644 index 0000000..9b227a1 --- /dev/null +++ b/scraibe/app/stg.py @@ -0,0 +1,157 @@ +""" +stg - scraibe to gradio interface + +This file contains the code for the scraibe to gradio interface. +It makes adds gradio interactions to the scraibe class in the back. + +""" + +import json +import gradio as gr +from tqdm import tqdm +from scraibe import Scraibe + + +class GradioTranscriptionInterface: + """ + Interface handling the interaction between Gradio UI and the Audio Transcription system. + """ + + def __init__(self, model: Scraibe): + """ + Initializes the GradioTranscriptionInterface with a transcription model. + + Args: + model (Scraibe): Model responsible for audio transcription tasks. + """ + self.model = model + + def auto_transcribe(self, source, + num_speakers : int, + translation : bool, + language : str): + """ + Shortcut method for the Scraibe task. + + Returns: + tuple: Transcribed text (str), JSON output (dict) + """ + + kwargs = { + "num_speakers": num_speakers if num_speakers != 0 else None, + "language": language if language != "None" else None, + "task": 'translate' if translation else None + } + if isinstance(source, str): + try: + result = self.model.autotranscribe(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + + return str(result), result.get_json() + + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): + try: + res = self.model.autotranscribe(s, **kwargs) + except ValueError: + _name = s.split("/")[-1] + res = f"NO TRANSCRIPT FOUND FOR {_name}" + gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.") + result.append(res) + + out = '' + out_dict = {} + for i, r in enumerate(result): + out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" + out += str(r) + out += "\n\n" + + if isinstance(r, str): + out_dict[source_names[i]] = r + else: + out_dict[source_names[i]] = r.get_dict() + + return out, json.dumps(out_dict, indent=4) + + else: + raise gr.Error("Please provide a valid audio file.") + + + def transcribe(self, source, translation, language): + """ + Shortcut method for the Transcribe task. + + Returns: + str: Transcribed text. + """ + kwargs = { + "language": language if language != "None" else None, + "task": 'translate' if translation == "Yes" else None + } + + if isinstance(source, str): + result = self.model.transcribe(source, **kwargs) + + return str(result) + + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): + res = self.model.transcribe(s, **kwargs) + result.append(res) + + out = '' + for i, res in enumerate(result): + out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" + out += str(res) + out += "\n\n" + + return out + + else: + raise gr.Error("Please provide a valid audio file.") + + def perform_diarisation(self, source, num_speakers): + """ + Shortcut method for the Diarisation task. + + Returns: + str: JSON output of diarisation result. + """ + kwargs = { + "num_speakers": num_speakers if num_speakers != 0 else None, + } + + if isinstance(source, str): + try: + result = self.model.diarization(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + + return json.dumps(result, indent=2) + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Performing diarisation"): + try: + res = self.model.diarization(s, **kwargs) + except ValueError: + res = f"NO DIARISATION FOUND FOR {s}" + gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") + result.append(res) + + out = {} + + for i, res in enumerate(result): + out[source_names[i]] = res + + return json.dumps(out, indent=4) + + else: + gr.Error("Please provide a valid audio file.")