diff --git a/autotranscript/__init__.py b/autotranscript/__init__.py index 20bcc93..aefa393 100644 --- a/autotranscript/__init__.py +++ b/autotranscript/__init__.py @@ -1,10 +1,13 @@ from .autotranscript import * -from .app.qtfaststart import * from .transcriber import * from .audio import * from .transcript_exporter import * from .diarisation import * + from .version import get_version as _get_version from .misc import * + +from .app.gradio_app import * +from .app.qtfaststart import * __version__ = _get_version() diff --git a/autotranscript/app/__init__.py b/autotranscript/app/__init__.py index c61a882..dc00e7a 100644 --- a/autotranscript/app/__init__.py +++ b/autotranscript/app/__init__.py @@ -1 +1,2 @@ -from .qtfaststart import * \ No newline at end of file +from .qtfaststart import * +from .gradio_app import * \ No newline at end of file diff --git a/gradio_app.py b/autotranscript/app/gradio_app.py similarity index 54% rename from gradio_app.py rename to autotranscript/app/gradio_app.py index 321f8bc..d6aade8 100644 --- a/gradio_app.py +++ b/autotranscript/app/gradio_app.py @@ -1,3 +1,20 @@ +""" +Gradio Audio Transcription App. +-------------------------------- + +This module provides an interface to transcribe audio files using the +AutoTranscribe model. Users can either upload an audio file or record their speech +live for transcription. The application supports multiple languages and provides +options to specify the number of speakers and the language of the audio. + +Attributes: + LANGUAGES (list): A list of supported languages for transcription. + +Usage: + Run this script to start the Gradio web interface for audio transcription. + +""" + from autotranscript import AutoTranscribe import gradio as gr @@ -18,13 +35,32 @@ LANGUAGES = [ def gradio_server(model : AutoTranscribe): + """ + Sets up and launches the Gradio interface for audio transcription. + Args: + model (AutoTranscribe): An instance of the AutoTranscribe model for transcription. + """ def transcribe(audio, microphone, number_of_speakers, language): + """ + Transcribes the provided audio input based on the given parameters. + + Args: + audio (str): Filepath to the uploaded audio file. + microphone (str): Filepath to the recorded audio. + number_of_speakers (int): Number of speakers in the audio. + language (str): Language of the audio content. + + Returns: + tuple: Transcribed text (str), JSON output (dict) + """ kwargs = {} if number_of_speakers != 0: kwargs["num_speakers"] = number_of_speakers if language != "None": kwargs["language"] = language + + print() if audio is not None: out = model.transcribe(audio, **kwargs) @@ -33,30 +69,31 @@ def gradio_server(model : AutoTranscribe): else: out = "Please upload an audio file or record one." - - return str(out) + return str(out), out.get_json(), out.get_md() gr.Interface( fn=transcribe, inputs=[ - gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", interactive=True), - gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", interactive=True), - gr.Number(value=0, label= "Number of speakers", + gr.Audio(source= "upload", type="filepath", label="Upload Your Audio File", + interactive=True), + gr.Audio(source= "microphone", type="filepath", label="Record Your Audio", + interactive=True, container= False), + gr.Number(value=0, label= "Number of speakers (optional)", info = "Number of speakers in the audio file. If you don't know, leave it at 0."), - # gr.Number(value=0, label= "Minimal number of speakers", - # info = "Minimal number of speakers in the audio file. If you don't know or you have specified Numspeakers, leave it at 0."), gr.Dropdown(LANGUAGES, - label="Languages", default="None", + label="Language (optional)", value = "None", info="Language of the audio file. If you don't know, leave it at None.") ], outputs=[ - "text" + gr.Textbox(label="Transcription"), + gr.JSON(label="Raw Output", container= False), ], title="Audio Transcription", - thumbnail = "Logo_KIDA.png", description="Upload an audio file to transcribe its content. Powered by AutoTranscribe!", theme="soft", # Example of a more modern theme - ).launch(share=True) + server_port=7860, + server_name="autotranscribe", + ).queue().launch() if __name__ == "__main__": diff --git a/autotranscript/autotranscript.py b/autotranscript/autotranscript.py index e053d6a..e3abd90 100644 --- a/autotranscript/autotranscript.py +++ b/autotranscript/autotranscript.py @@ -353,7 +353,7 @@ def cli(): pass if start_server: - from .gradio_app import gradio_app + from .app.gradio_app import gradio_app gradio_app(model) if __name__ == "__main__": diff --git a/autotranscript/transcript_exporter.py b/autotranscript/transcript_exporter.py index 9262be6..c6bfa5c 100644 --- a/autotranscript/transcript_exporter.py +++ b/autotranscript/transcript_exporter.py @@ -99,7 +99,7 @@ class Transcript: sseg = time.strftime("%H:%M:%S",time.gmtime(segm[0])) eseg = time.strftime("%H:%M:%S",time.gmtime(segm[1])) - fstring += f"{speaker} ({sseg} ; {eseg}): {seq['text']}\n" + fstring += f"{speaker} ({sseg} ; {eseg}):\t{seq['text']}\n" return fstring def __repr__(self) -> str: @@ -128,7 +128,7 @@ class Transcript: :rtype: str """ if "indent" not in kwargs: - kwargs["indent"] = 4 + kwargs["indent"] = 3 return json.dumps(self.transcript, *args, **kwargs) def get_html(self) -> str: