Merge branch 'develop' into pipy-package

2023-08-30 10:42:31 +02:00
parent 064a169b52 74eb8b8a86
commit 01c3638e0a
5 changed files with 58 additions and 17 deletions
@@ -1,11 +1,15 @@
 from .autotranscript import *
-from .app.qtfaststart import *
 from .transcriber import *
 from .audio import *
 from .transcript_exporter import *
 from .diarisation import *
+
 from .version import get_version as _get_version
 from .misc import *
+
+from .app.gradio_app import *
+from .app.qtfaststart import *
+
 from .cli import *
 
 __version__ = _get_version()
@@ -1 +1,2 @@
-from .qtfaststart import *
+from .qtfaststart import *
+from .gradio_app import *
@@ -0,0 +1,102 @@
+"""
+Gradio Audio Transcription App.
+--------------------------------
+
+This module provides an interface to transcribe audio files using the 
+AutoTranscribe model. Users can either upload an audio file or record their speech 
+live for transcription. The application supports multiple languages and provides 
+options to specify the number of speakers and the language of the audio.
+
+Attributes:
+    LANGUAGES (list): A list of supported languages for transcription.
+
+Usage:
+    Run this script to start the Gradio web interface for audio transcription.
+    
+"""
+
+from autotranscript import AutoTranscribe
+import gradio as gr
+
+LANGUAGES = [
+    "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
+    "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
+    "Czech", "Danish", "Dutch", "English", "Estonian",
+    "Finnish", "French", "Galician", "German", "Greek",
+    "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
+    "Italian", "Japanese", "Kannada", "Kazakh", "Korean",
+    "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
+    "Maori", "Nepali", "Norwegian", "Persian", "Polish",
+    "Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
+    "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
+    "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
+    "Vietnamese", "Welsh"
+]
+
+
+def gradio_server(model : AutoTranscribe):
+    """
+    Sets up and launches the Gradio interface for audio transcription.
+
+    Args:
+        model (AutoTranscribe): An instance of the AutoTranscribe model for transcription.
+    """
+    def transcribe(audio, microphone, number_of_speakers, language):
+        """
+        Transcribes the provided audio input based on the given parameters.
+
+        Args:
+            audio (str): Filepath to the uploaded audio file.
+            microphone (str): Filepath to the recorded audio.
+            number_of_speakers (int): Number of speakers in the audio.
+            language (str): Language of the audio content.
+
+        Returns:
+            tuple: Transcribed text (str), JSON output (dict)
+        """
+        kwargs = {}
+        if number_of_speakers != 0:
+            kwargs["num_speakers"] = number_of_speakers
+        if language != "None":
+            kwargs["language"] = language
+            
+        print()
+        
+        if audio is not None:
+            out = model.transcribe(audio, **kwargs)
+        elif microphone is not None:
+            out = model.transcribe(microphone , **kwargs)
+        else:
+            out = "Please upload an audio file or record one."
+        
+        return str(out), out.get_json(), out.get_md()
+
+    gr.Interface(
+        fn=transcribe, 
+        inputs=[
+            gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File",
+                     interactive=True),
+            gr.Audio(source= "microphone", type="filepath", label="Record Your Audio",
+                     interactive=True, container= False),
+            gr.Number(value=0, label= "Number of speakers (optional)", 
+                      info = "Number of speakers in the audio file. If you don't know, leave it at 0."), 
+            gr.Dropdown(LANGUAGES,
+                        label="Language (optional)", value = "None",
+                        info="Language of the audio file. If you don't know, leave it at None.")
+        ],
+        outputs=[
+            gr.Textbox(label="Transcription"),
+            gr.JSON(label="Raw Output", container= False),
+        ],
+        title="Audio Transcription",
+        description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!",
+        theme="soft",       # Example of a more modern theme
+        server_port=7860,
+        server_name="autotranscribe",   
+    ).queue().launch() 
+    
+    
+if __name__ == "__main__":
+    
+    model = AutoTranscribe()
+    gradio_server(model)
@@ -276,5 +276,4 @@ class AutoTranscribe:
        if not isinstance(audio_file, AudioProcessor):
            raise ValueError(f'Audiofile must be of type AudioProcessor,' \
                             f'not {type(audio_file)}')     
-        return audio_file
-    
+        return audio_file
@@ -99,7 +99,7 @@ class Transcript:
            sseg = time.strftime("%H:%M:%S",time.gmtime(segm[0]))
            eseg = time.strftime("%H:%M:%S",time.gmtime(segm[1]))
            
-            fstring += f"{speaker} ({sseg} ; {eseg}): {seq['text']}\n"
+            fstring += f"{speaker} ({sseg} ; {eseg}):\t{seq['text']}\n"
        return fstring
    
    def __repr__(self) -> str:
@@ -128,7 +128,7 @@ class Transcript:
        :rtype: str
        """
        if "indent" not in kwargs:
-            kwargs["indent"] = 4
+            kwargs["indent"] = 3
        return json.dumps(self.transcript, *args, **kwargs)
    
    def get_html(self) -> str: