moved and updated gradio app

2023-08-25 13:38:21 +02:00
parent 9fbe850b88
commit b6f8bc2477
1 changed files with 48 additions and 11 deletions
@@ -1,3 +1,20 @@
+"""
+Gradio Audio Transcription App.
+--------------------------------
+
+This module provides an interface to transcribe audio files using the 
+AutoTranscribe model. Users can either upload an audio file or record their speech 
+live for transcription. The application supports multiple languages and provides 
+options to specify the number of speakers and the language of the audio.
+
+Attributes:
+    LANGUAGES (list): A list of supported languages for transcription.
+
+Usage:
+    Run this script to start the Gradio web interface for audio transcription.
+    
+"""
+
 from autotranscript import AutoTranscribe
 import gradio as gr

@@ -18,14 +35,33 @@ LANGUAGES = [


 def gradio_server(model : AutoTranscribe):
+    """
+    Sets up and launches the Gradio interface for audio transcription.

+    Args:
+        model (AutoTranscribe): An instance of the AutoTranscribe model for transcription.
+    """
    def transcribe(audio, microphone, number_of_speakers, language):
+        """
+        Transcribes the provided audio input based on the given parameters.
+
+        Args:
+            audio (str): Filepath to the uploaded audio file.
+            microphone (str): Filepath to the recorded audio.
+            number_of_speakers (int): Number of speakers in the audio.
+            language (str): Language of the audio content.
+
+        Returns:
+            tuple: Transcribed text (str), JSON output (dict)
+        """
        kwargs = {}
        if number_of_speakers != 0:
            kwargs["num_speakers"] = number_of_speakers
        if language != "None":
            kwargs["language"] = language
            
+        print()
+        
        if audio is not None:
            out = model.transcribe(audio, **kwargs)
        elif microphone is not None:
@@ -33,30 +69,31 @@ def gradio_server(model : AutoTranscribe):
        else:
            out = "Please upload an audio file or record one."
        
-        
-        return str(out)
+        return str(out), out.get_json(), out.get_md()

    gr.Interface(
        fn=transcribe, 
        inputs=[
-            gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", interactive=True),
-            gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", interactive=True),
-            gr.Number(value=0, label= "Number of speakers", 
+            gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File",
+                     interactive=True),
+            gr.Audio(source= "microphone", type="filepath", label="Record Your Audio",
+                     interactive=True, container= False),
+            gr.Number(value=0, label= "Number of speakers (optional)", 
                      info = "Number of speakers in the audio file. If you don't know, leave it at 0."), 
-            # gr.Number(value=0, label= "Minimal number of speakers", 
-            #           info = "Minimal number of speakers in the audio file. If you don't know or you have specified Numspeakers, leave it at 0."),
            gr.Dropdown(LANGUAGES,
-                        label="Languages", default="None",
+                        label="Language (optional)", value = "None",
                        info="Language of the audio file. If you don't know, leave it at None.")
        ],
        outputs=[
-            "text"
+            gr.Textbox(label="Transcription"),
+            gr.JSON(label="Raw Output", container= False),
        ],
        title="Audio Transcription",
-        thumbnail = "Logo_KIDA.png",
        description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!",
        theme="soft",       # Example of a more modern theme
-    ).launch(share=True)
+        server_port=7860,
+        server_name="autotranscribe",   
+    ).queue().launch() 
    
    
 if __name__ == "__main__":