diff --git a/gradio_app.py b/autotranscript/app/gradio_app.py similarity index 54% rename from gradio_app.py rename to autotranscript/app/gradio_app.py index 321f8bc..d6aade8 100644 --- a/gradio_app.py +++ b/autotranscript/app/gradio_app.py @@ -1,3 +1,20 @@ +""" +Gradio Audio Transcription App. +-------------------------------- + +This module provides an interface to transcribe audio files using the +AutoTranscribe model. Users can either upload an audio file or record their speech +live for transcription. The application supports multiple languages and provides +options to specify the number of speakers and the language of the audio. + +Attributes: + LANGUAGES (list): A list of supported languages for transcription. + +Usage: + Run this script to start the Gradio web interface for audio transcription. + +""" + from autotranscript import AutoTranscribe import gradio as gr @@ -18,13 +35,32 @@ LANGUAGES = [ def gradio_server(model : AutoTranscribe): + """ + Sets up and launches the Gradio interface for audio transcription. + Args: + model (AutoTranscribe): An instance of the AutoTranscribe model for transcription. + """ def transcribe(audio, microphone, number_of_speakers, language): + """ + Transcribes the provided audio input based on the given parameters. + + Args: + audio (str): Filepath to the uploaded audio file. + microphone (str): Filepath to the recorded audio. + number_of_speakers (int): Number of speakers in the audio. + language (str): Language of the audio content. + + Returns: + tuple: Transcribed text (str), JSON output (dict) + """ kwargs = {} if number_of_speakers != 0: kwargs["num_speakers"] = number_of_speakers if language != "None": kwargs["language"] = language + + print() if audio is not None: out = model.transcribe(audio, **kwargs) @@ -33,30 +69,31 @@ def gradio_server(model : AutoTranscribe): else: out = "Please upload an audio file or record one." - - return str(out) + return str(out), out.get_json(), out.get_md() gr.Interface( fn=transcribe, inputs=[ - gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", interactive=True), - gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", interactive=True), - gr.Number(value=0, label= "Number of speakers", + gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", + interactive=True), + gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", + interactive=True, container= False), + gr.Number(value=0, label= "Number of speakers (optional)", info = "Number of speakers in the audio file. If you don't know, leave it at 0."), - # gr.Number(value=0, label= "Minimal number of speakers", - # info = "Minimal number of speakers in the audio file. If you don't know or you have specified Numspeakers, leave it at 0."), gr.Dropdown(LANGUAGES, - label="Languages", default="None", + label="Language (optional)", value = "None", info="Language of the audio file. If you don't know, leave it at None.") ], outputs=[ - "text" + gr.Textbox(label="Transcription"), + gr.JSON(label="Raw Output", container= False), ], title="Audio Transcription", - thumbnail = "Logo_KIDA.png", description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!", theme="soft", # Example of a more modern theme - ).launch(share=True) + server_port=7860, + server_name="autotranscribe", + ).queue().launch() if __name__ == "__main__":