diff --git a/app.py b/app.py index 3645d79..c9beffe 100644 --- a/app.py +++ b/app.py @@ -1,12 +1,12 @@ from dash import Dash, dcc, html, dash_table, Input, Output, State, callback import base64 -from autotranscript.app.qtfaststart import process -from autotranscript import AutoTranscribe +from scraibe.app.qtfaststart import process +from scraibe import AutoTranscribe import io import subprocess as sp import numpy as np -from autotranscript.audio import SAMPLE_RATE +from scraibe.audio import SAMPLE_RATE # Setup auto-transcript autot = AutoTranscribe() # whisper_model="tiny", whisper_kwargs={"local" : False} diff --git a/app2.py b/app2.py new file mode 100644 index 0000000..55cb11b --- /dev/null +++ b/app2.py @@ -0,0 +1,317 @@ +""" +Gradio Audio Transcription App. +-------------------------------- + +This module provides an interface to transcribe audio files using the +AutoTranscribe model. Users can either upload an audio file or record their speech +live for transcription. The application supports multiple languages and provides +options to specify the number of speakers and the language of the audio. + +Attributes: + LANGUAGES (list): A list of supported languages for transcription. + +Usage: + Run this script to start the Gradio web interface for audio transcription. + +""" + + +import json + +import gradio as gr +from scraibe import AutoTranscribe, Transcript + + + +theme = gr.themes.Soft( + primary_hue="green", + secondary_hue='orange', + neutral_hue="gray", + +) +LANGUAGES = [ + "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", + "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", + "Czech", "Danish", "Dutch", "English", "Estonian", + "Finnish", "French", "Galician", "German", "Greek", + "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", + "Italian", "Japanese", "Kannada", "Kazakh", "Korean", + "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", + "Maori", "Nepali", "Norwegian", "Persian", "Polish", + "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", + "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", + "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", + "Vietnamese", "Welsh" +] + +class GradioTranscriptionInterface: + """ + Interface handling the interaction between Gradio UI and the Audio Transcription system. + """ + + def __init__(self, model: AutoTranscribe = AutoTranscribe()): + """ + Initializes the GradioTranscriptionInterface with a transcription model. + + Args: + model (AutoTranscribe): Model responsible for audio transcription tasks. + """ + self.model = model + + def auto_transcribe(self, source, + num_speakers : int, + translation : bool, + language : str): + """ + Shortcut method for the AutoTranscribe task. + + Returns: + tuple: Transcribed text (str), JSON output (dict) + """ + + kwargs = { + "num_speakers": num_speakers if num_speakers != 0 else None, + "language": language if language != "None" else None, + "task": 'translate' if translation else None + } + + try: + result = self.model.autotranscribe(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + return str(result), result.get_json() + + + def transcribe(self, source, translation, language): + """ + Shortcut method for the Transcribe task. + + Returns: + str: Transcribed text. + """ + kwargs = { + "language": language if language != "None" else None, + "task": 'translate' if translation == "Yes" else None + } + + result = self.model.transcribe(source, **kwargs) + return str(result) + + def perform_diarisation(self, source, num_speakers): + """ + Shortcut method for the Diarisation task. + + Returns: + str: JSON output of diarisation result. + """ + kwargs = { + "num_speakers": num_speakers if num_speakers != 0 else None, + } + + + try: + result = self.model.diarization(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + return json.dumps(result, indent=2) + +#### +# Gradio Interface +#### + +pipe = GradioTranscriptionInterface() + + +def select_task(choice): + if choice == 'Auto Transcribe': + + return (gr.update(visible = True), + gr.update(visible = True), + gr.update(visible = True)) + + + elif choice == 'Transcribe': + + return (gr.update(visible = False), + gr.update(visible = True), + gr.update(visible = True)) + + + elif choice == 'Diarisation': + + return (gr.update(visible = True), + gr.update(visible = False), + gr.update(visible = False)) + +def select_origin(choice): + if choice == "Upload Audio": + + return (gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Record Audio": + + return (gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Upload Video": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Record Video": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None)) + + elif choice == "File": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True)) + +def run_scribe(task, num_speakers, translate, language, audio1, audio2, video1, video2, file_in, progress = gr.Progress(track_tqdm= True)): + # get *args which are not None + progress(0, desc='Starting task...') + source = audio1 or audio2 or video1 or video2 or file_in + + if task == 'Auto Transcribe': + + out_str , out_json = pipe.auto_transcribe(source = source, + num_speakers = num_speakers, + translation = translate, + language = language) + + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = True), + gr.update(visible = True)) + + elif task == 'Transcribe': + + out = pipe.transcribe(source = source, + translation = translate, + language = language) + + return (gr.update(value = out, visible = True), + gr.update(value = None, visible = False), + gr.update(visible = False), + gr.update(visible = False)) + + elif task == 'Diarisation': + + out = pipe.perform_diarisation(source = source, + num_speakers = num_speakers) + + return (gr.update(value = None, visible = False), + gr.update(value = out, visible = True), + gr.update(visible = False), + gr.update(visible = False)) + +def annotate_output(annoation : str, out_json : dict): + # get *args which are not None + + trans = Transcript.from_json(out_json) + trans = trans.annotate(*annoation.split(",")) + + return gr.update(value = str(trans)),gr.update(value = trans.get_json()) + + +with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: + + # Define components + header = open("header.html", "r").read() + gr.HTML(header, visible= True, show_label=False) + + with gr.Row(): + + with gr.Column(): + + task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task", + value= 'Auto Transcribe') + + num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", + info = "Number of speakers in the audio file. If you don't know,\ + leave it at 0.", visible= True) + + translate = gr.Checkbox(label="Translation", choices=[True, False], value = False, + info="Select 'Yes' to have the output translated into English.", + visible= True) + + language = gr.Dropdown(LANGUAGES, + label="Language (optional)", value = "None", + info="Language of the audio file. If you don't know,\ + leave it at None.", visible= True) + + input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" + ,"File"], label="Input Type", value="Upload Audio") + + audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", + interactive= True, visible= True) + audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath", + interactive= True, visible= False) + video1 = gr.Video(source="upload", type="filepath", label="Upload Video", + interactive= True, visible= False) + video2 = gr.Video(source="webcam", label="Record Video", type="filepath", + interactive= True, visible= False) + file_in = gr.File(label="Upload File", interactive= True, visible= False) + + submit = gr.Button() + + with gr.Column(): + + out_txt = gr.Textbox(label="Output", + visible= True, show_copy_button=True) + + out_json = gr.JSON(label="JSON Output", + visible= False, show_copy_button=True) + + annoation = gr.Textbox(label="Name your speaker's", + info= "Please provide a list of the speakers arranged \ + in the order in which they appear in the input. Use comma ',' \ + as a seperator. Be aware that the first name is given \ + to SPEAKER_00 the second to SPEAKER_01 and so on.", + visible= False, interactive= True) + + annotate = gr.Button(value="Annotate", visible= False, interactive= True) + + # Define usage of components + input.change(fn=select_origin, inputs=[input], + outputs=[audio1, audio2, video1, video2, file_in]) + + task.change(fn=select_task, inputs=[task], + outputs=[num_speakers, translate, language]) + + translate.change(fn= lambda x : gr.update(value = x), + inputs=[translate], outputs=[translate]) + num_speakers.change(fn= lambda x : gr.update(value = x), + inputs=[num_speakers], outputs=[num_speakers]) + language.change(fn= lambda x : gr.update(value = x), + inputs=[language], outputs=[language]) + + submit.click(fn = run_scribe, + inputs=[task, num_speakers, translate, language, audio1, + audio2, video1, video2, file_in], + outputs=[out_txt, out_json, annoation, annotate]) + + annotate.click(fn = annotate_output, inputs=[annoation, out_json], + outputs=[out_txt, out_json]) + + +demo.queue().launch() \ No newline at end of file diff --git a/autotranscript/.pyannotetoken b/autotranscript/.pyannotetoken deleted file mode 100644 index e69de29..0000000 diff --git a/scraibe/.pyannotetoken b/scraibe/.pyannotetoken new file mode 100644 index 0000000..42ba269 --- /dev/null +++ b/scraibe/.pyannotetoken @@ -0,0 +1 @@ +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 \ No newline at end of file diff --git a/autotranscript/__init__.py b/scraibe/__init__.py similarity index 100% rename from autotranscript/__init__.py rename to scraibe/__init__.py diff --git a/autotranscript/app/Logo_KIDA_bmel_green.svg b/scraibe/app/Logo_KIDA_bmel_green.svg similarity index 100% rename from autotranscript/app/Logo_KIDA_bmel_green.svg rename to scraibe/app/Logo_KIDA_bmel_green.svg diff --git a/autotranscript/app/__init__.py b/scraibe/app/__init__.py similarity index 100% rename from autotranscript/app/__init__.py rename to scraibe/app/__init__.py diff --git a/autotranscript/app/gradio_app.py b/scraibe/app/gradio_app.py similarity index 99% rename from autotranscript/app/gradio_app.py rename to scraibe/app/gradio_app.py index 13a6ee1..6f09506 100644 --- a/autotranscript/app/gradio_app.py +++ b/scraibe/app/gradio_app.py @@ -35,7 +35,7 @@ Usage: import json import gradio as gr -from autotranscript import AutoTranscribe, Transcript +from scraibe import AutoTranscribe, Transcript theme = gr.themes.Soft( diff --git a/autotranscript/app/header.html b/scraibe/app/header.html similarity index 100% rename from autotranscript/app/header.html rename to scraibe/app/header.html diff --git a/autotranscript/app/qtfaststart.py b/scraibe/app/qtfaststart.py similarity index 100% rename from autotranscript/app/qtfaststart.py rename to scraibe/app/qtfaststart.py diff --git a/autotranscript/audio.py b/scraibe/audio.py similarity index 100% rename from autotranscript/audio.py rename to scraibe/audio.py diff --git a/autotranscript/autotranscript.py b/scraibe/autotranscript.py similarity index 99% rename from autotranscript/autotranscript.py rename to scraibe/autotranscript.py index d27dba8..f588e42 100644 --- a/autotranscript/autotranscript.py +++ b/scraibe/autotranscript.py @@ -126,7 +126,6 @@ class AutoTranscribe: diarisation = self.diariser.diarization(dia_audio, **kwargs) - if not diarisation["segments"]: print("No segments found. Try to run transcription without diarisation.") @@ -145,8 +144,6 @@ class AutoTranscribe: # Transcribe each segment and store the results final_transcript = dict() - - for i in trange(len(diarisation["segments"]), desc= "Transcribing"): seg = diarisation["segments"][i] diff --git a/autotranscript/cli.py b/scraibe/cli.py similarity index 100% rename from autotranscript/cli.py rename to scraibe/cli.py diff --git a/autotranscript/diarisation.py b/scraibe/diarisation.py similarity index 100% rename from autotranscript/diarisation.py rename to scraibe/diarisation.py diff --git a/autotranscript/misc.py b/scraibe/misc.py similarity index 100% rename from autotranscript/misc.py rename to scraibe/misc.py diff --git a/autotranscript/transcriber.py b/scraibe/transcriber.py similarity index 100% rename from autotranscript/transcriber.py rename to scraibe/transcriber.py diff --git a/autotranscript/transcript_exporter.py b/scraibe/transcript_exporter.py similarity index 100% rename from autotranscript/transcript_exporter.py rename to scraibe/transcript_exporter.py diff --git a/autotranscript/version.py b/scraibe/version.py similarity index 95% rename from autotranscript/version.py rename to scraibe/version.py index 0a3730e..b3cf626 100644 --- a/autotranscript/version.py +++ b/scraibe/version.py @@ -1,69 +1,69 @@ -import os -import subprocess as sp - -MAJOR = 0 -MINOR = 1 -MICRO = 0 -MICRO_POST = 0 -ISRELEASED = False -VERSION = '%d.%d.%d.%d' % (MAJOR, MINOR, MICRO, MICRO_POST) - -# Return the git revision as a string -# taken from numpy/numpy -def git_version(): - def _minimal_ext_cmd(cmd): - # construct minimal environment - env = {} - for k in ['SYSTEMROOT', 'PATH', 'HOME']: - v = os.environ.get(k) - if v is not None: - env[k] = v - - # LANGUAGE is used on win32 - env['LANGUAGE'] = 'C' - env['LANG'] = 'C' - env['LC_ALL'] = 'C' - - out = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=env).communicate()[0] - return out - - try: - out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) - GIT_REVISION = out.strip().decode('ascii') - except OSError: - GIT_REVISION = "Unknown" - - return GIT_REVISION - -def _get_git_version(): - cwd = os.getcwd() - - # go to the main directory - fdir = os.path.dirname(os.path.abspath(__file__)) - maindir = os.path.abspath(os.path.join(fdir, "..")) - # maindir = fdir # os.path.join(fdir, "..") - os.chdir(maindir) - - # get git version - res = git_version() - - # restore the cwd - os.chdir(cwd) - return res - -def get_version(build_version=False): - if ISRELEASED: - return VERSION - - # unreleased version - GIT_REVISION = _get_git_version() - - if build_version: - import datetime as dt - date = dt.date.strftime(dt.datetime.now(), "%Y%m%d%H%M%S") - return VERSION + ".dev" + date - else: - return VERSION + ".dev0+" + GIT_REVISION[:7] - - - +import os +import subprocess as sp + +MAJOR = 0 +MINOR = 1 +MICRO = 0 +MICRO_POST = 0 +ISRELEASED = False +VERSION = '%d.%d.%d.%d' % (MAJOR, MINOR, MICRO, MICRO_POST) + +# Return the git revision as a string +# taken from numpy/numpy +def git_version(): + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + + out = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=env).communicate()[0] + return out + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + GIT_REVISION = out.strip().decode('ascii') + except OSError: + GIT_REVISION = "Unknown" + + return GIT_REVISION + +def _get_git_version(): + cwd = os.getcwd() + + # go to the main directory + fdir = os.path.dirname(os.path.abspath(__file__)) + maindir = os.path.abspath(os.path.join(fdir, "..")) + # maindir = fdir # os.path.join(fdir, "..") + os.chdir(maindir) + + # get git version + res = git_version() + + # restore the cwd + os.chdir(cwd) + return res + +def get_version(build_version=False): + if ISRELEASED: + return VERSION + + # unreleased version + GIT_REVISION = _get_git_version() + + if build_version: + import datetime as dt + date = dt.date.strftime(dt.datetime.now(), "%Y%m%d%H%M%S") + return VERSION + ".dev" + date + else: + return VERSION + ".dev0+" + GIT_REVISION[:7] + + + diff --git a/setup.py b/setup.py index 05a7f77..6efaf3b 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import pkg_resources import os from setuptools import setup, find_packages -module_name = "autotranscript" +module_name = "scraibe" github_url = "https://github.com/JSchmie/autotranscript" file_dir = os.path.dirname(os.path.realpath(__file__)) @@ -18,7 +18,7 @@ with open(verfile, "r") as fp: ############### setup ############### -build_version = "AUTOTRANSCRIPT_BUILD" in os.environ +build_version = "SCRAIBE_BUILD" in os.environ if __name__ == "__main__": @@ -42,5 +42,5 @@ if __name__ == "__main__": description='Transcription tool for audio files based on Whisper and Pyannote', package_data={ "header" : ["app/header.html"], "logo" : ["app/Logo_KIDA_bmel_green.svg"]}, entry_points={'console_scripts': - ['autotranscript = autotranscript.cli:cli']} + ['scraibe = scraibe.cli:cli']} ) diff --git a/test_autotranscript.py b/test_autotranscript.py index 8f745a0..475f4de 100644 --- a/test_autotranscript.py +++ b/test_autotranscript.py @@ -1,5 +1,5 @@ import pytest -from autotranscript import Transcriber +from scraibe import Transcriber from unittest.mock import patch, mock_open import os @@ -55,7 +55,7 @@ def test_save_transcript_to_file(transcriber): # Test Diaraization class -from autotranscript import Diariser +from scraibe import Diariser @pytest.fixture def diarisation(): @@ -83,7 +83,7 @@ def test_diarisation(diarisation): # Test AudioProcessor -from autotranscript import AudioProcessor , TorchAudioProcessor +from scraibe import AudioProcessor , TorchAudioProcessor def test_AudioProcessor_init(): diff --git a/transcribe.py b/transcribe.py index 73d8838..094dcfe 100644 --- a/transcribe.py +++ b/transcribe.py @@ -18,16 +18,14 @@ # os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models") -from autotranscript import AutoTranscribe - +from scraibe import AutoTranscribe model = AutoTranscribe() -text = model.transcribe("test.mp4") +text = model.autotranscribe('kida.mp4', num_speakers=2) print("Transcription:\n") print(text) - # from autotranscript.misc import * # import os