rework structure of gradio app

2023-11-20 15:01:51 +01:00
parent f691790c00
commit bbb2c848e3
8 changed files with 484 additions and 507 deletions
@@ -1,2 +1,7 @@
 from .qtfaststart import *
-from .gradio_app import *
+from .activity_tracker import *
+from .interface import *
+from .stg import *
+from .interactions import *
+from .global_var import *
+from .app import *
@@ -0,0 +1,37 @@
+"""
+This file contains the functions which are related to monitoring the actual app usage. 
+Therefore, the app is to be more efficient in the usage of the resources. 
+By for example, unloading or reloading the model.
+"""
+import time
+import threading
+import torch
+import gc
+import gradio as gr
+
+
+timeout = 30 #seconds
+USER_ACTIVE = True
+user_active_lock = threading.Lock() # dummy for now
+
+# Create a thread to monitor user activity
+def monitor_activity(model, pipe, timeout=timeout):
+    global USER_ACTIVE
+    
+    while True:
+        time.sleep(timeout)  # Check user activity every second
+        with user_active_lock:
+            
+            if not USER_ACTIVE:
+                del model
+                del pipe
+                
+                gc.collect()
+                torch.cuda.empty_cache()
+                
+                
+                
+                print("Model deleted empty memory")
+                gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.")
+                break
+            USER_ACTIVE = False 
@@ -0,0 +1,9 @@
+"""
+Stores global variables for the app.
+"""
+
+# Global variable to store the model
+MODEL = None
+
+# Global variable to track user activity
+USER_ACTIVE = False
@@ -1,504 +0,0 @@
-"""
-Gradio Audio Transcription App.
--------------------------------
-
-This module provides an interface to transcribe audio files using the 
-Scraibe model. Users can either upload an audio file or record their speech 
-live for transcription. The application supports multiple languages and provides 
-options to specify the number of speakers and the language of the audio.
-
-Attributes:
-    LANGUAGES (list): A list of supported languages for transcription.
-
-Usage:
-    Run this script to start the Gradio web interface for audio transcription.
-    
-"""
-
-"""
-Gradio Audio Transcription App.
--------------------------------
-
-This module provides an interface to transcribe audio files using the 
-Scraibe model. Users can either upload an audio file or record their speech 
-live for transcription. The application supports multiple languages and provides 
-options to specify the number of speakers and the language of the audio.
-
-Attributes:
-    LANGUAGES (list): A list of supported languages for transcription.
-
-Usage:
-    Run this script to start the Gradio web interface for audio transcription.
-    
-"""
-
-
-import json
-from math import pi
-import os
-
-import gradio as gr
-import threading
-from tqdm import tqdm
-
-import time
-from scraibe import Scraibe, Transcript
-
-theme = gr.themes.Soft(
-    primary_hue="green",
-    secondary_hue='orange',
-    neutral_hue="gray",  
-)
-
-LANGUAGES = [
-    "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
-    "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
-    "Czech", "Danish", "Dutch", "English", "Estonian",
-    "Finnish", "French", "Galician", "German", "Greek",
-    "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
-    "Italian", "Japanese", "Kannada", "Kazakh", "Korean",
-    "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
-    "Maori", "Nepali", "Norwegian", "Persian", "Polish",
-    "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
-    "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
-    "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
-    "Vietnamese", "Welsh"
-]
-
-CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
-
-
-# Global variable to track user activity
-USER_ACTIVE = True
-
-# Lock to synchronize access to user_active variable
-user_active_lock = threading.Lock()
-
-# Function to reset the user activity flag
-def reset_user_activity():
-    global USER_ACTIVE
-    with user_active_lock:
-        USER_ACTIVE = True
-
-class GradioTranscriptionInterface:
-    """
-    Interface handling the interaction between Gradio UI and the Audio Transcription system.
-    """
-
-    def __init__(self, model: Scraibe):
-        """
-        Initializes the GradioTranscriptionInterface with a transcription model.
-
-        Args:
-            model (Scraibe): Model responsible for audio transcription tasks.
-        """
-        self.model = model
-
-    def auto_transcribe(self, source,
-                        num_speakers : int,
-                        translation : bool,
-                        language : str):
-        """
-        Shortcut method for the Scraibe task.
-
-        Returns:
-            tuple: Transcribed text (str), JSON output (dict)
-        """
-        
-        kwargs = {
-            "num_speakers": num_speakers if num_speakers != 0 else None,
-            "language": language if language != "None" else None,
-            "task": 'translate' if translation else None
-        }
-        if isinstance(source, str):
-            try:
-                result = self.model.autotranscribe(source, **kwargs)
-            except ValueError:
-                raise gr.Error("Couldn't detect any speech in the provided audio. \
-                        Please try again!")
-            
-            return str(result), result.get_json()
-        
-        elif isinstance(source, list):
-            source_names = [s.split("/")[-1] for s in source]
-            result = []
-            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
-                try:
-                    res = self.model.autotranscribe(s, **kwargs)
-                except ValueError:
-                    _name = s.split("/")[-1]
-                    res = f"NO TRANSCRIPT FOUND FOR {_name}"
-                    gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
-                result.append(res)
-            
-            out = ''
-            out_dict = {}
-            for i, r in enumerate(result):
-                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
-                out += str(r)
-                out += "\n\n"
-                
-                if isinstance(r, str):
-                    out_dict[source_names[i]] = r
-                else:
-                    out_dict[source_names[i]] = r.get_dict()
-              
-            return out, json.dumps(out_dict, indent=4)
-        
-        else:
-            raise gr.Error("Please provide a valid audio file.")
-
-
-    def transcribe(self, source, translation, language):
-        """
-        Shortcut method for the Transcribe task.
-
-        Returns:
-            str: Transcribed text.
-        """
-        kwargs = {
-            "language": language if language != "None" else None,
-            "task": 'translate' if translation == "Yes" else None
-        }
-        
-        if isinstance(source, str):
-            result = self.model.transcribe(source, **kwargs)
-            
-            return str(result)
-        
-        elif isinstance(source, list):
-            source_names = [s.split("/")[-1] for s in source]
-            result = []
-            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
-                res = self.model.transcribe(s, **kwargs)
-                result.append(res)
-            
-            out = ''
-            for i, res in enumerate(result):
-                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
-                out += str(res)
-                out += "\n\n"
-            
-            return out
-        
-        else:
-            raise gr.Error("Please provide a valid audio file.")
-
-    def perform_diarisation(self, source, num_speakers):
-        """
-        Shortcut method for the Diarisation task.
-
-        Returns:
-            str: JSON output of diarisation result.
-        """
-        kwargs = {
-            "num_speakers": num_speakers if num_speakers != 0 else None,
-        }
-        
-        if isinstance(source, str):
-            try:
-                result = self.model.diarization(source, **kwargs)
-            except ValueError:
-                raise gr.Error("Couldn't detect any speech in the provided audio. \
-                        Please try again!")
-        
-            return json.dumps(result, indent=2)
-        elif isinstance(source, list):
-            source_names = [s.split("/")[-1] for s in source]
-            result = []
-            for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
-                try:
-                    res = self.model.diarization(s, **kwargs)
-                except ValueError:
-                    res = f"NO DIARISATION FOUND FOR {s}"
-                    gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
-                result.append(res)
-            
-            out = {}
-            
-            for i, res in enumerate(result):
-                out[source_names[i]] = res
-                
-            return json.dumps(out, indent=4)
-        
-        else:
-            gr.Error("Please provide a valid audio file.")        
-
-####
-# Gradio Interface
-####
-
-def gradio_Interface(model : Scraibe = None, timeout = 1):
-    """
-    Gradio Web interface for audio transcription.
-
-    :param model: Scraibe model, defaults to None
-    :type model: Scraibe, optional
-    :param timeout: Time until model is unloaded, defaults to 600 seconds
-    :type timeout: int, optional
-    :return: Gradio Interface
-    :rtype: gradio.Interface
-    """
-    
-    if model is None:
-        model = Scraibe()
-    
-    save_model_params = model.params
-
-    pipe = GradioTranscriptionInterface(model)
-        
-    def select_task(choice):
-        # tell the app that it is still in use
-        reset_user_activity()
-        
-        if choice == 'Auto Transcribe':
-            
-            return (gr.update(visible = True),
-                    gr.update(visible = True),
-                    gr.update(visible = True))
-                    
-            
-        elif choice == 'Transcribe':
-            
-            return (gr.update(visible = False),
-                    gr.update(visible = True),
-                    gr.update(visible = True))
-
-            
-        elif choice == 'Diarisation':
-            
-            return (gr.update(visible = True),
-                    gr.update(visible = False),
-                    gr.update(visible = False))
-        
-    def select_origin(choice):
-        
-        # tell the app that it is still in use
-        reset_user_activity()
-        
-        if choice == "Upload Audio":
-            
-            return (gr.update(visible = True),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None))
-        
-        elif choice == "Record Audio":
-            
-            return (gr.update(visible = False, value = None),
-                    gr.update(visible = True),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None))
-
-        elif choice == "Upload Video":
-            
-            return (gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = True),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None))
-        
-        elif choice == "Record Video":
-            
-            return (gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = True),
-                    gr.update(visible = False, value = None))
-            
-        elif choice == "File or Files":
-            
-            return (gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = False, value = None),
-                    gr.update(visible = True))
-
-    def run_scribe(task,
-                   num_speakers,
-                   translate,
-                   language,
-                   audio1,
-                   audio2,
-                   video1,
-                   video2,
-                   file_in,
-                   progress = gr.Progress(track_tqdm= True)):
-        # get *args which are not None
-        
-        if not "model" in locals():
-            gr.Warning("Model unloaded due to inactivity. Reloading the model, please wait.")
-            model = Scraibe(**save_model_params)
-            pipe = GradioTranscriptionInterface(model)
-        # # tell the app that it is still in use
-        reset_user_activity()
-        
-        progress(0, desc='Starting task...')
-        source = audio1 or audio2 or video1 or video2 or file_in
-        
-        if isinstance(source, list):
-            source = [s.name for s in source]
-            if len(source) == 1:
-                source = source[0]
- 
-        if task == 'Auto Transcribe':
-    
-            out_str , out_json = pipe.auto_transcribe(source = source,
-                                num_speakers = num_speakers,
-                                translation = translate,
-                                language = language)
-            
-            if isinstance(source, str):
-                return (gr.update(value = out_str, visible = True),
-                        gr.update(value = out_json, visible = True),
-                        gr.update(visible = True),
-                        gr.update(visible = True))      
-            else:
-                return (gr.update(value = out_str, visible = True),
-                        gr.update(value = out_json, visible = True),
-                        gr.update(visible = False),
-                        gr.update(visible = False))  
-            
-        elif task == 'Transcribe':
-            
-            out = pipe.transcribe(source = source,
-                                translation = translate,
-                                language = language)
-            
-            return (gr.update(value = out, visible = True),
-                    gr.update(value = None, visible = False),
-                    gr.update(visible = False),
-                    gr.update(visible = False))
-            
-        elif task == 'Diarisation':
-            
-            out = pipe.perform_diarisation(source = source,
-                                num_speakers = num_speakers)
-            
-            return (gr.update(value = None, visible = False),
-                    gr.update(value = out, visible = True),
-                    gr.update(visible = False),
-                    gr.update(visible = False))
-        
-    def annotate_output(annoation : str, out_json : dict):
-        # get *args which are not None
-        
-        trans = Transcript.from_json(out_json)
-        trans = trans.annotate(*annoation.split(","))
-
-        return gr.update(value = str(trans)),gr.update(value = trans.get_json())
-
-    # Create a thread to monitor user activity
-    def monitor_activity(model, pipe, timeout=timeout):
-        global USER_ACTIVE
-        
-        while True:
-            time.sleep(timeout)  # Check user activity every second
-            with user_active_lock:
-                
-                if not USER_ACTIVE:
-                    del model
-                    del pipe
-                    print("Model deleted empty memory")
-                    gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.")
-                    break
-                USER_ACTIVE = False 
-    
-    # Start the monitoring thread
-    activity_thread = threading.Thread(target=monitor_activity, args=(model, pipe))
-    activity_thread.daemon = True
-    activity_thread.start()
-        
-    with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo:
-
-        # Define components
-        hname = os.path.join(CURRENT_PATH, "header.html")
-        header = open(hname, "r").read()
-        
-        # ugly hack to get the logo to work
-        header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" )
-        
-        gr.HTML(header, visible= True, show_label=False)
-        
-        with gr.Row():
-            
-            with gr.Column():
-            
-                task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task",
-                                value= 'Auto Transcribe')
-                
-                num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", 
-                                info = "Number of speakers in the audio file. If you don't know,\
-                                    leave it at 0.", visible= True)
-                
-                translate = gr.Checkbox(label="Translation", choices=[True, False], value = False,
-                                info="Select 'Yes' to have the output translated into English.",
-                                visible= True)
-                
-                language = gr.Dropdown(LANGUAGES,
-                                label="Language (optional)", value = "None",
-                                info="Language of the audio file. If you don't know,\
-                                    leave it at None.", visible= True)
-                
-                input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" 
-                                    ,"File or Files"], label="Input Type", value="Upload Audio")
-                
-                audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
-                                    interactive= True, visible= True)
-                audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath",
-                                    interactive= True, visible= False)
-                video1 = gr.Video(source="upload", type="filepath", label="Upload Video",
-                                    interactive= True, visible= False)
-                video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True,
-                                    interactive= True, visible= False)
-                file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
-                
-                submit = gr.Button()
-            
-            with gr.Column():
-                
-                out_txt = gr.Textbox(label="Output",
-                                        visible= True, show_copy_button=True)
-                
-                out_json = gr.JSON(label="JSON Output",
-                                    visible= False, show_copy_button=True)
-                
-                annoation = gr.Textbox(label="Name your speaker's",
-                                    info= "Please provide a list of the speakers arranged \
-                                    in the order in which they appear in the input. Use comma ',' \
-                                    as a seperator. Be aware that the first name is given \
-                                        to SPEAKER_00 the second to SPEAKER_01 and so on.",
-                                    visible= False, interactive= True)
-                
-                annotate = gr.Button(value="Annotate", visible= False, interactive= True)
-            
-        # Define usage of components
-        input.change(fn=select_origin, inputs=[input],
-                        outputs=[audio1, audio2, video1, video2, file_in])
-        
-        task.change(fn=select_task, inputs=[task],
-                    outputs=[num_speakers, translate, language])
-        
-        translate.change(fn= lambda x : gr.update(value = x),
-                            inputs=[translate], outputs=[translate])
-        num_speakers.change(fn= lambda x : gr.update(value = x),
-                            inputs=[num_speakers], outputs=[num_speakers])
-        language.change(fn= lambda x : gr.update(value = x), 
-                        inputs=[language], outputs=[language])
-        
-        submit.click(fn = run_scribe, 
-                        inputs=[task, num_speakers, translate, language, audio1,
-                                audio2, video1, video2, file_in],
-                        outputs=[out_txt, out_json, annoation, annotate])
-        
-        annotate.click(fn = annotate_output, inputs=[annoation, out_json],
-                        outputs=[out_txt, out_json])
-        
-    return demo
-
-    
-if __name__ == "__main__":
-    
-    gradio_Interface().queue().launch()
@@ -0,0 +1,145 @@
+"""
+This file contains ervery function that will be called when the user interacts with the 
+UI like pressing a button or uploading a file.
+"""
+
+from math import pi
+import gradio as gr 
+import scraibe.app.global_var as gv
+from scraibe import Transcript
+
+def select_task(choice):
+        # tell the app that it is still in use
+    if choice == 'Auto Transcribe':
+        
+        return (gr.update(visible = True),
+                gr.update(visible = True),
+                gr.update(visible = True))
+                
+        
+    elif choice == 'Transcribe':
+        
+        return (gr.update(visible = False),
+                gr.update(visible = True),
+                gr.update(visible = True))
+
+        
+    elif choice == 'Diarisation':
+        
+        return (gr.update(visible = True),
+                gr.update(visible = False),
+                gr.update(visible = False))
+        
+def select_origin(choice):
+        
+    # tell the app that it is still in use
+    if choice == "Upload Audio":
+        
+        return (gr.update(visible = True),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None))
+    
+    elif choice == "Record Audio":
+        
+        return (gr.update(visible = False, value = None),
+                gr.update(visible = True),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None))
+
+    elif choice == "Upload Video":
+        
+        return (gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = True),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None))
+    
+    elif choice == "Record Video":
+        
+        return (gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = True),
+                gr.update(visible = False, value = None))
+        
+    elif choice == "File or Files":
+        
+        return (gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = False, value = None),
+                gr.update(visible = True))
+        
+def run_scraibe(task,
+                num_speakers,
+                translate,
+                language,
+                audio1,
+                audio2,
+                video1,
+                video2,
+                file_in,
+                progress = gr.Progress(track_tqdm= True)):
+    
+    # get *args which are not None
+    
+    pipe = gv.MODEL
+
+    progress(0, desc='Starting task...')
+    source = audio1 or audio2 or video1 or video2 or file_in
+    
+    if isinstance(source, list):
+        source = [s.name for s in source]
+        if len(source) == 1:
+            source = source[0]
+
+    if task == 'Auto Transcribe':
+
+        out_str , out_json = pipe.auto_transcribe(source = source,
+                            num_speakers = num_speakers,
+                            translation = translate,
+                            language = language)
+        
+        if isinstance(source, str):
+            return (gr.update(value = out_str, visible = True),
+                    gr.update(value = out_json, visible = True),
+                    gr.update(visible = True),
+                    gr.update(visible = True))      
+        else:
+            return (gr.update(value = out_str, visible = True),
+                    gr.update(value = out_json, visible = True),
+                    gr.update(visible = False),
+                    gr.update(visible = False))  
+        
+    elif task == 'Transcribe':
+        
+        out = pipe.transcribe(source = source,
+                            translation = translate,
+                            language = language)
+        
+        return (gr.update(value = out, visible = True),
+                gr.update(value = None, visible = False),
+                gr.update(visible = False),
+                gr.update(visible = False))
+        
+    elif task == 'Diarisation':
+        
+        out = pipe.perform_diarisation(source = source,
+                            num_speakers = num_speakers)
+        
+        return (gr.update(value = None, visible = False),
+                gr.update(value = out, visible = True),
+                gr.update(visible = False),
+                gr.update(visible = False))
+    
+def annotate_output(annoation : str, out_json : dict):
+    # get *args which are not None
+    
+    trans = Transcript.from_json(out_json)
+    trans = trans.annotate(*annoation.split(","))
+
+    return gr.update(value = str(trans)),gr.update(value = trans.get_json())
+
@@ -0,0 +1,129 @@
+"""
+This file contains the actual gradio Interface which is used to interact with the user.
+"""
+
+import gradio as gr
+import os
+
+import scraibe.app.global_var as gv
+from .interactions import *
+from .stg import *
+
+from scraibe import Scraibe
+
+theme = gr.themes.Soft(
+    primary_hue="green",
+    secondary_hue='orange',
+    neutral_hue="gray",  
+)
+
+
+LANGUAGES = [
+    "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
+    "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
+    "Czech", "Danish", "Dutch", "English", "Estonian",
+    "Finnish", "French", "Galician", "German", "Greek",
+    "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
+    "Italian", "Japanese", "Kannada", "Kazakh", "Korean",
+    "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
+    "Maori", "Nepali", "Norwegian", "Persian", "Polish",
+    "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
+    "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
+    "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
+    "Vietnamese", "Welsh"
+]
+
+CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
+
+
+def gradio_Interface(pipe : Scraibe = None):
+    
+    if pipe is not None:
+        gv.MODEL = GradioTranscriptionInterface(pipe)
+    
+    with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo:
+            
+            # Define components
+            hname = os.path.join(CURRENT_PATH, "header.html")
+            header = open(hname, "r").read()
+            
+            # ugly hack to get the logo to work
+            header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" )
+            
+            gr.HTML(header, visible= True, show_label=False)
+            
+            with gr.Row():
+                
+                with gr.Column():
+                
+                    task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task",
+                                    value= 'Auto Transcribe')
+                    
+                    num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", 
+                                    info = "Number of speakers in the audio file. If you don't know,\
+                                        leave it at 0.", visible= True)
+                    
+                    translate = gr.Checkbox(label="Translation", choices=[True, False], value = False,
+                                    info="Select 'Yes' to have the output translated into English.",
+                                    visible= True)
+                    
+                    language = gr.Dropdown(LANGUAGES,
+                                    label="Language (optional)", value = "None",
+                                    info="Language of the audio file. If you don't know,\
+                                        leave it at None.", visible= True)
+                    
+                    input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" 
+                                        ,"File or Files"], label="Input Type", value="Upload Audio")
+                    
+                    audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
+                                        interactive= True, visible= True)
+                    audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath",
+                                        interactive= True, visible= False)
+                    video1 = gr.Video(source="upload", type="filepath", label="Upload Video",
+                                        interactive= True, visible= False)
+                    video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True,
+                                        interactive= True, visible= False)
+                    file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
+                    
+                    submit = gr.Button()
+                
+                with gr.Column():
+                    
+                    out_txt = gr.Textbox(label="Output",
+                                            visible= True, show_copy_button=True)
+                    
+                    out_json = gr.JSON(label="JSON Output",
+                                        visible= False, show_copy_button=True)
+                    
+                    annoation = gr.Textbox(label="Name your speaker's",
+                                        info= "Please provide a list of the speakers arranged \
+                                        in the order in which they appear in the input. Use comma ',' \
+                                        as a seperator. Be aware that the first name is given \
+                                            to SPEAKER_00 the second to SPEAKER_01 and so on.",
+                                        visible= False, interactive= True)
+                    
+                    annotate = gr.Button(value="Annotate", visible= False, interactive= True)
+                
+            # Define usage of components
+            input.change(fn=select_origin, inputs=[input],
+                            outputs=[audio1, audio2, video1, video2, file_in])
+            
+            task.change(fn=select_task, inputs=[task],
+                        outputs=[num_speakers, translate, language])
+            
+            translate.change(fn= lambda x : gr.update(value = x),
+                                inputs=[translate], outputs=[translate])
+            num_speakers.change(fn= lambda x : gr.update(value = x),
+                                inputs=[num_speakers], outputs=[num_speakers])
+            language.change(fn= lambda x : gr.update(value = x), 
+                            inputs=[language], outputs=[language])
+            
+            submit.click(fn = run_scraibe, 
+                            inputs=[task, num_speakers, translate, language, audio1,
+                                    audio2, video1, video2, file_in],
+                            outputs=[out_txt, out_json, annoation, annotate])
+            
+            annotate.click(fn = annotate_output, inputs=[annoation, out_json],
+                            outputs=[out_txt, out_json])
+            
+    return demo
@@ -0,0 +1,157 @@
+"""
+stg - scraibe to gradio interface
+
+This file contains the code for the scraibe to gradio interface.
+It makes adds gradio interactions to the scraibe class in the back.
+
+"""
+
+import json
+import gradio as gr
+from tqdm import tqdm
+from scraibe import Scraibe
+
+
+class GradioTranscriptionInterface:
+    """
+    Interface handling the interaction between Gradio UI and the Audio Transcription system.
+    """
+
+    def __init__(self, model: Scraibe):
+        """
+        Initializes the GradioTranscriptionInterface with a transcription model.
+
+        Args:
+            model (Scraibe): Model responsible for audio transcription tasks.
+        """
+        self.model = model
+
+    def auto_transcribe(self, source,
+                        num_speakers : int,
+                        translation : bool,
+                        language : str):
+        """
+        Shortcut method for the Scraibe task.
+
+        Returns:
+            tuple: Transcribed text (str), JSON output (dict)
+        """
+        
+        kwargs = {
+            "num_speakers": num_speakers if num_speakers != 0 else None,
+            "language": language if language != "None" else None,
+            "task": 'translate' if translation else None
+        }
+        if isinstance(source, str):
+            try:
+                result = self.model.autotranscribe(source, **kwargs)
+            except ValueError:
+                raise gr.Error("Couldn't detect any speech in the provided audio. \
+                        Please try again!")
+            
+            return str(result), result.get_json()
+        
+        elif isinstance(source, list):
+            source_names = [s.split("/")[-1] for s in source]
+            result = []
+            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
+                try:
+                    res = self.model.autotranscribe(s, **kwargs)
+                except ValueError:
+                    _name = s.split("/")[-1]
+                    res = f"NO TRANSCRIPT FOUND FOR {_name}"
+                    gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
+                result.append(res)
+            
+            out = ''
+            out_dict = {}
+            for i, r in enumerate(result):
+                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
+                out += str(r)
+                out += "\n\n"
+                
+                if isinstance(r, str):
+                    out_dict[source_names[i]] = r
+                else:
+                    out_dict[source_names[i]] = r.get_dict()
+              
+            return out, json.dumps(out_dict, indent=4)
+        
+        else:
+            raise gr.Error("Please provide a valid audio file.")
+
+
+    def transcribe(self, source, translation, language):
+        """
+        Shortcut method for the Transcribe task.
+
+        Returns:
+            str: Transcribed text.
+        """
+        kwargs = {
+            "language": language if language != "None" else None,
+            "task": 'translate' if translation == "Yes" else None
+        }
+        
+        if isinstance(source, str):
+            result = self.model.transcribe(source, **kwargs)
+            
+            return str(result)
+        
+        elif isinstance(source, list):
+            source_names = [s.split("/")[-1] for s in source]
+            result = []
+            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
+                res = self.model.transcribe(s, **kwargs)
+                result.append(res)
+            
+            out = ''
+            for i, res in enumerate(result):
+                out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
+                out += str(res)
+                out += "\n\n"
+            
+            return out
+        
+        else:
+            raise gr.Error("Please provide a valid audio file.")
+
+    def perform_diarisation(self, source, num_speakers):
+        """
+        Shortcut method for the Diarisation task.
+
+        Returns:
+            str: JSON output of diarisation result.
+        """
+        kwargs = {
+            "num_speakers": num_speakers if num_speakers != 0 else None,
+        }
+        
+        if isinstance(source, str):
+            try:
+                result = self.model.diarization(source, **kwargs)
+            except ValueError:
+                raise gr.Error("Couldn't detect any speech in the provided audio. \
+                        Please try again!")
+        
+            return json.dumps(result, indent=2)
+        elif isinstance(source, list):
+            source_names = [s.split("/")[-1] for s in source]
+            result = []
+            for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
+                try:
+                    res = self.model.diarization(s, **kwargs)
+                except ValueError:
+                    res = f"NO DIARISATION FOUND FOR {s}"
+                    gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
+                result.append(res)
+            
+            out = {}
+            
+            for i, res in enumerate(result):
+                out[source_names[i]] = res
+                
+            return json.dumps(out, indent=4)
+        
+        else:
+            gr.Error("Please provide a valid audio file.")