moved and updated gradio app

2023-08-25 13:38:21 +02:00
parent 9fbe850b88
commit b6f8bc2477
1 changed files with 48 additions and 11 deletions
@@ -1,3 +1,20 @@
 """
 Gradio Audio Transcription App.
 --------------------------------
 This module provides an interface to transcribe audio files using the 
 AutoTranscribe model. Users can either upload an audio file or record their speech 
 live for transcription. The application supports multiple languages and provides 
 options to specify the number of speakers and the language of the audio.
 Attributes:
    LANGUAGES (list): A list of supported languages for transcription.
 Usage:
    Run this script to start the Gradio web interface for audio transcription.
 """
 from autotranscript import AutoTranscribe
 import gradio as gr
@@ -18,13 +35,32 @@ LANGUAGES = [
 def gradio_server(model : AutoTranscribe):
    """
    Sets up and launches the Gradio interface for audio transcription.
    Args:
        model (AutoTranscribe): An instance of the AutoTranscribe model for transcription.
    """
    def transcribe(audio, microphone, number_of_speakers, language):
        """
        Transcribes the provided audio input based on the given parameters.
        Args:
            audio (str): Filepath to the uploaded audio file.
            microphone (str): Filepath to the recorded audio.
            number_of_speakers (int): Number of speakers in the audio.
            language (str): Language of the audio content.
        Returns:
            tuple: Transcribed text (str), JSON output (dict)
        """
        kwargs = {}
        if number_of_speakers != 0:
            kwargs["num_speakers"] = number_of_speakers
        if language != "None":
            kwargs["language"] = language
        print()
        if audio is not None:
            out = model.transcribe(audio, **kwargs)
@@ -33,30 +69,31 @@ def gradio_server(model : AutoTranscribe):
        else:
            out = "Please upload an audio file or record one."
-        
+        return str(out), out.get_json(), out.get_md()
        return str(out)
    gr.Interface(
        fn=transcribe, 
        inputs=[
-            gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", interactive=True),
+            gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File",
-            gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", interactive=True),
+                     interactive=True),
-            gr.Number(value=0, label= "Number of speakers", 
+            gr.Audio(source= "microphone", type="filepath", label="Record Your Audio",
                     interactive=True, container= False),
            gr.Number(value=0, label= "Number of speakers (optional)", 
                      info = "Number of speakers in the audio file. If you don't know, leave it at 0."), 
            # gr.Number(value=0, label= "Minimal number of speakers", 
            #           info = "Minimal number of speakers in the audio file. If you don't know or you have specified Numspeakers, leave it at 0."),
            gr.Dropdown(LANGUAGES,
-                        label="Languages", default="None",
+                        label="Language (optional)", value = "None",
                        info="Language of the audio file. If you don't know, leave it at None.")
        ],
        outputs=[
-            "text"
+            gr.Textbox(label="Transcription"),
            gr.JSON(label="Raw Output", container= False),
        ],
        title="Audio Transcription",
        thumbnail = "Logo_KIDA.png",
        description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!",
        theme="soft",       # Example of a more modern theme
-    ).launch(share=True)
+        server_port=7860,
        server_name="autotranscribe",   
    ).queue().launch() 
 if __name__ == "__main__":