rework structure of gradio app

2023-11-20 15:01:51 +01:00
parent f691790c00
commit bbb2c848e3
8 changed files with 484 additions and 507 deletions
@@ -7,8 +7,7 @@ from .diarisation import *
 from .version import get_version as _get_version
 from .misc import *
-from .app.gradio_app import *
+from .app import *
 from .app.qtfaststart import *
 from .cli import *
@@ -1,2 +1,7 @@
 from .qtfaststart import *
-from .gradio_app import *
+from .activity_tracker import *
 from .interface import *
 from .stg import *
 from .interactions import *
 from .global_var import *
 from .app import *
@@ -0,0 +1,37 @@
 """
 This file contains the functions which are related to monitoring the actual app usage. 
 Therefore, the app is to be more efficient in the usage of the resources. 
 By for example, unloading or reloading the model.
 """
 import time
 import threading
 import torch
 import gc
 import gradio as gr
 timeout = 30 #seconds
 USER_ACTIVE = True
 user_active_lock = threading.Lock() # dummy for now
 # Create a thread to monitor user activity
 def monitor_activity(model, pipe, timeout=timeout):
    global USER_ACTIVE
    while True:
        time.sleep(timeout)  # Check user activity every second
        with user_active_lock:
            if not USER_ACTIVE:
                del model
                del pipe
                gc.collect()
                torch.cuda.empty_cache()
                print("Model deleted empty memory")
                gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.")
                break
            USER_ACTIVE = False 
@@ -0,0 +1,9 @@
 """
 Stores global variables for the app.
 """
 # Global variable to store the model
 MODEL = None
 # Global variable to track user activity
 USER_ACTIVE = False
@@ -1,504 +0,0 @@
 """
 Gradio Audio Transcription App.
 --------------------------------
 This module provides an interface to transcribe audio files using the 
 Scraibe model. Users can either upload an audio file or record their speech 
 live for transcription. The application supports multiple languages and provides 
 options to specify the number of speakers and the language of the audio.
 Attributes:
    LANGUAGES (list): A list of supported languages for transcription.
 Usage:
    Run this script to start the Gradio web interface for audio transcription.
 """
 """
 Gradio Audio Transcription App.
 --------------------------------
 This module provides an interface to transcribe audio files using the 
 Scraibe model. Users can either upload an audio file or record their speech 
 live for transcription. The application supports multiple languages and provides 
 options to specify the number of speakers and the language of the audio.
 Attributes:
    LANGUAGES (list): A list of supported languages for transcription.
 Usage:
    Run this script to start the Gradio web interface for audio transcription.
 """
 import json
 from math import pi
 import os
 import gradio as gr
 import threading
 from tqdm import tqdm
 import time
 from scraibe import Scraibe, Transcript
 theme = gr.themes.Soft(
    primary_hue="green",
    secondary_hue='orange',
    neutral_hue="gray",  
 )
 LANGUAGES = [
    "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
    "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
    "Czech", "Danish", "Dutch", "English", "Estonian",
    "Finnish", "French", "Galician", "German", "Greek",
    "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
    "Italian", "Japanese", "Kannada", "Kazakh", "Korean",
    "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
    "Maori", "Nepali", "Norwegian", "Persian", "Polish",
    "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
    "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
    "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
    "Vietnamese", "Welsh"
 ]
 CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
 # Global variable to track user activity
 USER_ACTIVE = True
 # Lock to synchronize access to user_active variable
 user_active_lock = threading.Lock()
 # Function to reset the user activity flag
 def reset_user_activity():
    global USER_ACTIVE
    with user_active_lock:
        USER_ACTIVE = True
 class GradioTranscriptionInterface:
    """
    Interface handling the interaction between Gradio UI and the Audio Transcription system.
    """
    def __init__(self, model: Scraibe):
        """
        Initializes the GradioTranscriptionInterface with a transcription model.
        Args:
            model (Scraibe): Model responsible for audio transcription tasks.
        """
        self.model = model
    def auto_transcribe(self, source,
                        num_speakers : int,
                        translation : bool,
                        language : str):
        """
        Shortcut method for the Scraibe task.
        Returns:
            tuple: Transcribed text (str), JSON output (dict)
        """
        kwargs = {
            "num_speakers": num_speakers if num_speakers != 0 else None,
            "language": language if language != "None" else None,
            "task": 'translate' if translation else None
        }
        if isinstance(source, str):
            try:
                result = self.model.autotranscribe(source, **kwargs)
            except ValueError:
                raise gr.Error("Couldn't detect any speech in the provided audio. \
                        Please try again!")
            return str(result), result.get_json()
        elif isinstance(source, list):
            source_names = [s.split("/")[-1] for s in source]
            result = []
            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
                try:
                    res = self.model.autotranscribe(s, **kwargs)
                except ValueError:
                    _name = s.split("/")[-1]
                    res = f"NO TRANSCRIPT FOUND FOR {_name}"
                    gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
                result.append(res)
            out = ''
            out_dict = {}
            for i, r in enumerate(result):
                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
                out += str(r)
                out += "\n\n"
                if isinstance(r, str):
                    out_dict[source_names[i]] = r
                else:
                    out_dict[source_names[i]] = r.get_dict()
            return out, json.dumps(out_dict, indent=4)
        else:
            raise gr.Error("Please provide a valid audio file.")
    def transcribe(self, source, translation, language):
        """
        Shortcut method for the Transcribe task.
        Returns:
            str: Transcribed text.
        """
        kwargs = {
            "language": language if language != "None" else None,
            "task": 'translate' if translation == "Yes" else None
        }
        if isinstance(source, str):
            result = self.model.transcribe(source, **kwargs)
            return str(result)
        elif isinstance(source, list):
            source_names = [s.split("/")[-1] for s in source]
            result = []
            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
                res = self.model.transcribe(s, **kwargs)
                result.append(res)
            out = ''
            for i, res in enumerate(result):
                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
                out += str(res)
                out += "\n\n"
            return out
        else:
            raise gr.Error("Please provide a valid audio file.")
    def perform_diarisation(self, source, num_speakers):
        """
        Shortcut method for the Diarisation task.
        Returns:
            str: JSON output of diarisation result.
        """
        kwargs = {
            "num_speakers": num_speakers if num_speakers != 0 else None,
        }
        if isinstance(source, str):
            try:
                result = self.model.diarization(source, **kwargs)
            except ValueError:
                raise gr.Error("Couldn't detect any speech in the provided audio. \
                        Please try again!")
            return json.dumps(result, indent=2)
        elif isinstance(source, list):
            source_names = [s.split("/")[-1] for s in source]
            result = []
            for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
                try:
                    res = self.model.diarization(s, **kwargs)
                except ValueError:
                    res = f"NO DIARISATION FOUND FOR {s}"
                    gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
                result.append(res)
            out = {}
            for i, res in enumerate(result):
                out[source_names[i]] = res
            return json.dumps(out, indent=4)
        else:
            gr.Error("Please provide a valid audio file.")        
 ####
 # Gradio Interface
 ####
 def gradio_Interface(model : Scraibe = None, timeout = 1):
    """
    Gradio Web interface for audio transcription.
    :param model: Scraibe model, defaults to None
    :type model: Scraibe, optional
    :param timeout: Time until model is unloaded, defaults to 600 seconds
    :type timeout: int, optional
    :return: Gradio Interface
    :rtype: gradio.Interface
    """
    if model is None:
        model = Scraibe()
    save_model_params = model.params
    pipe = GradioTranscriptionInterface(model)
    def select_task(choice):
        # tell the app that it is still in use
        reset_user_activity()
        if choice == 'Auto Transcribe':
            return (gr.update(visible = True),
                    gr.update(visible = True),
                    gr.update(visible = True))
        elif choice == 'Transcribe':
            return (gr.update(visible = False),
                    gr.update(visible = True),
                    gr.update(visible = True))
        elif choice == 'Diarisation':
            return (gr.update(visible = True),
                    gr.update(visible = False),
                    gr.update(visible = False))
    def select_origin(choice):
        # tell the app that it is still in use
        reset_user_activity()
        if choice == "Upload Audio":
            return (gr.update(visible = True),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None))
        elif choice == "Record Audio":
            return (gr.update(visible = False, value = None),
                    gr.update(visible = True),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None))
        elif choice == "Upload Video":
            return (gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = True),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None))
        elif choice == "Record Video":
            return (gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = True),
                    gr.update(visible = False, value = None))
        elif choice == "File or Files":
            return (gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = False, value = None),
                    gr.update(visible = True))
    def run_scribe(task,
                   num_speakers,
                   translate,
                   language,
                   audio1,
                   audio2,
                   video1,
                   video2,
                   file_in,
                   progress = gr.Progress(track_tqdm= True)):
        # get *args which are not None
        if not "model" in locals():
            gr.Warning("Model unloaded due to inactivity. Reloading the model, please wait.")
            model = Scraibe(**save_model_params)
            pipe = GradioTranscriptionInterface(model)
        # # tell the app that it is still in use
        reset_user_activity()
        progress(0, desc='Starting task...')
        source = audio1 or audio2 or video1 or video2 or file_in
        if isinstance(source, list):
            source = [s.name for s in source]
            if len(source) == 1:
                source = source[0]
        if task == 'Auto Transcribe':
            out_str , out_json = pipe.auto_transcribe(source = source,
                                num_speakers = num_speakers,
                                translation = translate,
                                language = language)
            if isinstance(source, str):
                return (gr.update(value = out_str, visible = True),
                        gr.update(value = out_json, visible = True),
                        gr.update(visible = True),
                        gr.update(visible = True))      
            else:
                return (gr.update(value = out_str, visible = True),
                        gr.update(value = out_json, visible = True),
                        gr.update(visible = False),
                        gr.update(visible = False))  
        elif task == 'Transcribe':
            out = pipe.transcribe(source = source,
                                translation = translate,
                                language = language)
            return (gr.update(value = out, visible = True),
                    gr.update(value = None, visible = False),
                    gr.update(visible = False),
                    gr.update(visible = False))
        elif task == 'Diarisation':
            out = pipe.perform_diarisation(source = source,
                                num_speakers = num_speakers)
            return (gr.update(value = None, visible = False),
                    gr.update(value = out, visible = True),
                    gr.update(visible = False),
                    gr.update(visible = False))
    def annotate_output(annoation : str, out_json : dict):
        # get *args which are not None
        trans = Transcript.from_json(out_json)
        trans = trans.annotate(*annoation.split(","))
        return gr.update(value = str(trans)),gr.update(value = trans.get_json())
    # Create a thread to monitor user activity
    def monitor_activity(model, pipe, timeout=timeout):
        global USER_ACTIVE
        while True:
            time.sleep(timeout)  # Check user activity every second
            with user_active_lock:
                if not USER_ACTIVE:
                    del model
                    del pipe
                    print("Model deleted empty memory")
                    gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.")
                    break
                USER_ACTIVE = False 
    # Start the monitoring thread
    activity_thread = threading.Thread(target=monitor_activity, args=(model, pipe))
    activity_thread.daemon = True
    activity_thread.start()
    with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo:
        # Define components
        hname = os.path.join(CURRENT_PATH, "header.html")
        header = open(hname, "r").read()
        # ugly hack to get the logo to work
        header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" )
        gr.HTML(header, visible= True, show_label=False)
        with gr.Row():
            with gr.Column():
                task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task",
                                value= 'Auto Transcribe')
                num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", 
                                info = "Number of speakers in the audio file. If you don't know,\
                                    leave it at 0.", visible= True)
                translate = gr.Checkbox(label="Translation", choices=[True, False], value = False,
                                info="Select 'Yes' to have the output translated into English.",
                                visible= True)
                language = gr.Dropdown(LANGUAGES,
                                label="Language (optional)", value = "None",
                                info="Language of the audio file. If you don't know,\
                                    leave it at None.", visible= True)
                input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" 
                                    ,"File or Files"], label="Input Type", value="Upload Audio")
                audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
                                    interactive= True, visible= True)
                audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath",
                                    interactive= True, visible= False)
                video1 = gr.Video(source="upload", type="filepath", label="Upload Video",
                                    interactive= True, visible= False)
                video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True,
                                    interactive= True, visible= False)
                file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
                submit = gr.Button()
            with gr.Column():
                out_txt = gr.Textbox(label="Output",
                                        visible= True, show_copy_button=True)
                out_json = gr.JSON(label="JSON Output",
                                    visible= False, show_copy_button=True)
                annoation = gr.Textbox(label="Name your speaker's",
                                    info= "Please provide a list of the speakers arranged \
                                    in the order in which they appear in the input. Use comma ',' \
                                    as a seperator. Be aware that the first name is given \
                                        to SPEAKER_00 the second to SPEAKER_01 and so on.",
                                    visible= False, interactive= True)
                annotate = gr.Button(value="Annotate", visible= False, interactive= True)
        # Define usage of components
        input.change(fn=select_origin, inputs=[input],
                        outputs=[audio1, audio2, video1, video2, file_in])
        task.change(fn=select_task, inputs=[task],
                    outputs=[num_speakers, translate, language])
        translate.change(fn= lambda x : gr.update(value = x),
                            inputs=[translate], outputs=[translate])
        num_speakers.change(fn= lambda x : gr.update(value = x),
                            inputs=[num_speakers], outputs=[num_speakers])
        language.change(fn= lambda x : gr.update(value = x), 
                        inputs=[language], outputs=[language])
        submit.click(fn = run_scribe, 
                        inputs=[task, num_speakers, translate, language, audio1,
                                audio2, video1, video2, file_in],
                        outputs=[out_txt, out_json, annoation, annotate])
        annotate.click(fn = annotate_output, inputs=[annoation, out_json],
                        outputs=[out_txt, out_json])
    return demo
 if __name__ == "__main__":
    gradio_Interface().queue().launch()
@@ -0,0 +1,145 @@
 """
 This file contains ervery function that will be called when the user interacts with the 
 UI like pressing a button or uploading a file.
 """
 from math import pi
 import gradio as gr 
 import scraibe.app.global_var as gv
 from scraibe import Transcript
 def select_task(choice):
        # tell the app that it is still in use
    if choice == 'Auto Transcribe':
        return (gr.update(visible = True),
                gr.update(visible = True),
                gr.update(visible = True))
    elif choice == 'Transcribe':
        return (gr.update(visible = False),
                gr.update(visible = True),
                gr.update(visible = True))
    elif choice == 'Diarisation':
        return (gr.update(visible = True),
                gr.update(visible = False),
                gr.update(visible = False))
 def select_origin(choice):
    # tell the app that it is still in use
    if choice == "Upload Audio":
        return (gr.update(visible = True),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None))
    elif choice == "Record Audio":
        return (gr.update(visible = False, value = None),
                gr.update(visible = True),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None))
    elif choice == "Upload Video":
        return (gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = True),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None))
    elif choice == "Record Video":
        return (gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = True),
                gr.update(visible = False, value = None))
    elif choice == "File or Files":
        return (gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = False, value = None),
                gr.update(visible = True))
 def run_scraibe(task,
                num_speakers,
                translate,
                language,
                audio1,
                audio2,
                video1,
                video2,
                file_in,
                progress = gr.Progress(track_tqdm= True)):
    # get *args which are not None
    pipe = gv.MODEL
    progress(0, desc='Starting task...')
    source = audio1 or audio2 or video1 or video2 or file_in
    if isinstance(source, list):
        source = [s.name for s in source]
        if len(source) == 1:
            source = source[0]
    if task == 'Auto Transcribe':
        out_str , out_json = pipe.auto_transcribe(source = source,
                            num_speakers = num_speakers,
                            translation = translate,
                            language = language)
        if isinstance(source, str):
            return (gr.update(value = out_str, visible = True),
                    gr.update(value = out_json, visible = True),
                    gr.update(visible = True),
                    gr.update(visible = True))      
        else:
            return (gr.update(value = out_str, visible = True),
                    gr.update(value = out_json, visible = True),
                    gr.update(visible = False),
                    gr.update(visible = False))  
    elif task == 'Transcribe':
        out = pipe.transcribe(source = source,
                            translation = translate,
                            language = language)
        return (gr.update(value = out, visible = True),
                gr.update(value = None, visible = False),
                gr.update(visible = False),
                gr.update(visible = False))
    elif task == 'Diarisation':
        out = pipe.perform_diarisation(source = source,
                            num_speakers = num_speakers)
        return (gr.update(value = None, visible = False),
                gr.update(value = out, visible = True),
                gr.update(visible = False),
                gr.update(visible = False))
 def annotate_output(annoation : str, out_json : dict):
    # get *args which are not None
    trans = Transcript.from_json(out_json)
    trans = trans.annotate(*annoation.split(","))
    return gr.update(value = str(trans)),gr.update(value = trans.get_json())
@@ -0,0 +1,129 @@
 """
 This file contains the actual gradio Interface which is used to interact with the user.
 """
 import gradio as gr
 import os
 import scraibe.app.global_var as gv
 from .interactions import *
 from .stg import *
 from scraibe import Scraibe
 theme = gr.themes.Soft(
    primary_hue="green",
    secondary_hue='orange',
    neutral_hue="gray",  
 )
 LANGUAGES = [
    "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
    "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
    "Czech", "Danish", "Dutch", "English", "Estonian",
    "Finnish", "French", "Galician", "German", "Greek",
    "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
    "Italian", "Japanese", "Kannada", "Kazakh", "Korean",
    "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
    "Maori", "Nepali", "Norwegian", "Persian", "Polish",
    "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
    "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
    "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
    "Vietnamese", "Welsh"
 ]
 CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
 def gradio_Interface(pipe : Scraibe = None):
    if pipe is not None:
        gv.MODEL = GradioTranscriptionInterface(pipe)
    with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo:
            # Define components
            hname = os.path.join(CURRENT_PATH, "header.html")
            header = open(hname, "r").read()
            # ugly hack to get the logo to work
            header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" )
            gr.HTML(header, visible= True, show_label=False)
            with gr.Row():
                with gr.Column():
                    task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task",
                                    value= 'Auto Transcribe')
                    num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", 
                                    info = "Number of speakers in the audio file. If you don't know,\
                                        leave it at 0.", visible= True)
                    translate = gr.Checkbox(label="Translation", choices=[True, False], value = False,
                                    info="Select 'Yes' to have the output translated into English.",
                                    visible= True)
                    language = gr.Dropdown(LANGUAGES,
                                    label="Language (optional)", value = "None",
                                    info="Language of the audio file. If you don't know,\
                                        leave it at None.", visible= True)
                    input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" 
                                        ,"File or Files"], label="Input Type", value="Upload Audio")
                    audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
                                        interactive= True, visible= True)
                    audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath",
                                        interactive= True, visible= False)
                    video1 = gr.Video(source="upload", type="filepath", label="Upload Video",
                                        interactive= True, visible= False)
                    video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True,
                                        interactive= True, visible= False)
                    file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
                    submit = gr.Button()
                with gr.Column():
                    out_txt = gr.Textbox(label="Output",
                                            visible= True, show_copy_button=True)
                    out_json = gr.JSON(label="JSON Output",
                                        visible= False, show_copy_button=True)
                    annoation = gr.Textbox(label="Name your speaker's",
                                        info= "Please provide a list of the speakers arranged \
                                        in the order in which they appear in the input. Use comma ',' \
                                        as a seperator. Be aware that the first name is given \
                                            to SPEAKER_00 the second to SPEAKER_01 and so on.",
                                        visible= False, interactive= True)
                    annotate = gr.Button(value="Annotate", visible= False, interactive= True)
            # Define usage of components
            input.change(fn=select_origin, inputs=[input],
                            outputs=[audio1, audio2, video1, video2, file_in])
            task.change(fn=select_task, inputs=[task],
                        outputs=[num_speakers, translate, language])
            translate.change(fn= lambda x : gr.update(value = x),
                                inputs=[translate], outputs=[translate])
            num_speakers.change(fn= lambda x : gr.update(value = x),
                                inputs=[num_speakers], outputs=[num_speakers])
            language.change(fn= lambda x : gr.update(value = x), 
                            inputs=[language], outputs=[language])
            submit.click(fn = run_scraibe, 
                            inputs=[task, num_speakers, translate, language, audio1,
                                    audio2, video1, video2, file_in],
                            outputs=[out_txt, out_json, annoation, annotate])
            annotate.click(fn = annotate_output, inputs=[annoation, out_json],
                            outputs=[out_txt, out_json])
    return demo
@@ -0,0 +1,157 @@
 """
 stg - scraibe to gradio interface
 This file contains the code for the scraibe to gradio interface.
 It makes adds gradio interactions to the scraibe class in the back.
 """
 import json
 import gradio as gr
 from tqdm import tqdm
 from scraibe import Scraibe
 class GradioTranscriptionInterface:
    """
    Interface handling the interaction between Gradio UI and the Audio Transcription system.
    """
    def __init__(self, model: Scraibe):
        """
        Initializes the GradioTranscriptionInterface with a transcription model.
        Args:
            model (Scraibe): Model responsible for audio transcription tasks.
        """
        self.model = model
    def auto_transcribe(self, source,
                        num_speakers : int,
                        translation : bool,
                        language : str):
        """
        Shortcut method for the Scraibe task.
        Returns:
            tuple: Transcribed text (str), JSON output (dict)
        """
        kwargs = {
            "num_speakers": num_speakers if num_speakers != 0 else None,
            "language": language if language != "None" else None,
            "task": 'translate' if translation else None
        }
        if isinstance(source, str):
            try:
                result = self.model.autotranscribe(source, **kwargs)
            except ValueError:
                raise gr.Error("Couldn't detect any speech in the provided audio. \
                        Please try again!")
            return str(result), result.get_json()
        elif isinstance(source, list):
            source_names = [s.split("/")[-1] for s in source]
            result = []
            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
                try:
                    res = self.model.autotranscribe(s, **kwargs)
                except ValueError:
                    _name = s.split("/")[-1]
                    res = f"NO TRANSCRIPT FOUND FOR {_name}"
                    gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
                result.append(res)
            out = ''
            out_dict = {}
            for i, r in enumerate(result):
                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
                out += str(r)
                out += "\n\n"
                if isinstance(r, str):
                    out_dict[source_names[i]] = r
                else:
                    out_dict[source_names[i]] = r.get_dict()
            return out, json.dumps(out_dict, indent=4)
        else:
            raise gr.Error("Please provide a valid audio file.")
    def transcribe(self, source, translation, language):
        """
        Shortcut method for the Transcribe task.
        Returns:
            str: Transcribed text.
        """
        kwargs = {
            "language": language if language != "None" else None,
            "task": 'translate' if translation == "Yes" else None
        }
        if isinstance(source, str):
            result = self.model.transcribe(source, **kwargs)
            return str(result)
        elif isinstance(source, list):
            source_names = [s.split("/")[-1] for s in source]
            result = []
            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
                res = self.model.transcribe(s, **kwargs)
                result.append(res)
            out = ''
            for i, res in enumerate(result):
                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
                out += str(res)
                out += "\n\n"
            return out
        else:
            raise gr.Error("Please provide a valid audio file.")
    def perform_diarisation(self, source, num_speakers):
        """
        Shortcut method for the Diarisation task.
        Returns:
            str: JSON output of diarisation result.
        """
        kwargs = {
            "num_speakers": num_speakers if num_speakers != 0 else None,
        }
        if isinstance(source, str):
            try:
                result = self.model.diarization(source, **kwargs)
            except ValueError:
                raise gr.Error("Couldn't detect any speech in the provided audio. \
                        Please try again!")
            return json.dumps(result, indent=2)
        elif isinstance(source, list):
            source_names = [s.split("/")[-1] for s in source]
            result = []
            for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
                try:
                    res = self.model.diarization(s, **kwargs)
                except ValueError:
                    res = f"NO DIARISATION FOUND FOR {s}"
                    gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
                result.append(res)
            out = {}
            for i, res in enumerate(result):
                out[source_names[i]] = res
            return json.dumps(out, indent=4)
        else:
            gr.Error("Please provide a valid audio file.")