From f2877d7ad4ac99216d80148c6902b1bf35543bb5 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Wed, 8 Nov 2023 17:07:30 +0100 Subject: [PATCH 01/42] try to implement sleep --- scraibe/app/gradio_app.py | 57 ++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py index cf80b7e..086db17 100644 --- a/scraibe/app/gradio_app.py +++ b/scraibe/app/gradio_app.py @@ -34,10 +34,12 @@ Usage: import json import os +import re import gradio as gr +import threading from tqdm import tqdm - +import time from scraibe import Scraibe, Transcript theme = gr.themes.Soft( @@ -63,6 +65,19 @@ LANGUAGES = [ CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) + +# Global variable to track user activity +USER_ACTIVE = True + +# Lock to synchronize access to user_active variable +user_active_lock = threading.Lock() + +# Function to reset the user activity flag +def reset_user_activity(): + global USER_ACTIVE + with user_active_lock: + USER_ACTIVE = True + class GradioTranscriptionInterface: """ Interface handling the interaction between Gradio UI and the Audio Transcription system. @@ -205,8 +220,7 @@ class GradioTranscriptionInterface: return json.dumps(out, indent=4) else: - gr.Error("Please provide a valid audio file.") - + gr.Error("Please provide a valid audio file.") #### # Gradio Interface @@ -218,8 +232,11 @@ def gradio_Interface(model : Scraibe = None): model = Scraibe() pipe = GradioTranscriptionInterface(model) - + def select_task(choice): + # tell the app that it is still in use + reset_user_activity() + if choice == 'Auto Transcribe': return (gr.update(visible = True), @@ -241,6 +258,10 @@ def gradio_Interface(model : Scraibe = None): gr.update(visible = False)) def select_origin(choice): + + # tell the app that it is still in use + reset_user_activity() + if choice == "Upload Audio": return (gr.update(visible = True), @@ -292,6 +313,10 @@ def gradio_Interface(model : Scraibe = None): file_in, progress = gr.Progress(track_tqdm= True)): # get *args which are not None + + # # tell the app that it is still in use + reset_user_activity() + progress(0, desc='Starting task...') source = audio1 or audio2 or video1 or video2 or file_in @@ -346,10 +371,28 @@ def gradio_Interface(model : Scraibe = None): trans = trans.annotate(*annoation.split(",")) return gr.update(value = str(trans)),gr.update(value = trans.get_json()) + + # Create a thread to monitor user activity + def monitor_activity(): + global USER_ACTIVE + while True: + time.sleep(60) # Check user activity every second + with user_active_lock: + + if not USER_ACTIVE: + del model + print("Model deleted empty memory") + break + USER_ACTIVE = False + + # Start the monitoring thread + activity_thread = threading.Thread(target=monitor_activity) + activity_thread.daemon = True + activity_thread.start() with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: - + # Define components hname = os.path.join(CURRENT_PATH, "header.html") header = open(hname, "r").read() @@ -358,7 +401,7 @@ def gradio_Interface(model : Scraibe = None): header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) gr.HTML(header, visible= True, show_label=False) - + with gr.Row(): with gr.Column(): @@ -433,6 +476,8 @@ def gradio_Interface(model : Scraibe = None): annotate.click(fn = annotate_output, inputs=[annoation, out_json], outputs=[out_txt, out_json]) + + return demo From ea03bf1f06c7448fd7d51eb70b819cc725ee0d20 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 10 Nov 2023 15:23:50 +0100 Subject: [PATCH 02/42] added variable to store params for possible reload --- scraibe/autotranscript.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scraibe/autotranscript.py b/scraibe/autotranscript.py index b3545e4..2664e3f 100644 --- a/scraibe/autotranscript.py +++ b/scraibe/autotranscript.py @@ -75,6 +75,11 @@ class Scraibe: Path to pyannote diarization model or model itself. **kwargs: Additional keyword arguments for whisper and pyannote diarization models. + e.g.: + + - verbose: If True, the class will print additional information. + - save_kwargs: If True, the keyword arguments will be saved + for autotranscribe. So you can unload the class and reload it again. """ @@ -98,6 +103,15 @@ class Scraibe: else: self.verbose = False + # Save kwargs for autotranscribe if you want to unload the class and load it again. + if kwargs.get('save_setup'): + self.params = dict(whisper_model = whisper_model, + dia_model = dia_model, + **kwargs) + else: + self.params = {} + + def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray], remove_original : bool = False, **kwargs) -> Transcript: From b42d1d1faaa58d400e8f908eb8e6ff8f700017fc Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 10 Nov 2023 15:43:01 +0100 Subject: [PATCH 03/42] tryed to unload model but it does not work jet --- scraibe/app/gradio_app.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py index 086db17..6913643 100644 --- a/scraibe/app/gradio_app.py +++ b/scraibe/app/gradio_app.py @@ -32,13 +32,15 @@ Usage: """ + import json +from math import pi import os -import re import gradio as gr import threading from tqdm import tqdm + import time from scraibe import Scraibe, Transcript @@ -226,11 +228,23 @@ class GradioTranscriptionInterface: # Gradio Interface #### -def gradio_Interface(model : Scraibe = None): +def gradio_Interface(model : Scraibe = None, timeout = 1): + """ + Gradio Web interface for audio transcription. + + :param model: Scraibe model, defaults to None + :type model: Scraibe, optional + :param timeout: Time until model is unloaded, defaults to 600 seconds + :type timeout: int, optional + :return: Gradio Interface + :rtype: gradio.Interface + """ if model is None: model = Scraibe() - + + save_model_params = model.params + pipe = GradioTranscriptionInterface(model) def select_task(choice): @@ -314,6 +328,10 @@ def gradio_Interface(model : Scraibe = None): progress = gr.Progress(track_tqdm= True)): # get *args which are not None + if not "model" in locals(): + gr.Warning("Model unloaded due to inactivity. Reloading the model, please wait.") + model = Scraibe(**save_model_params) + pipe = GradioTranscriptionInterface(model) # # tell the app that it is still in use reset_user_activity() @@ -373,21 +391,23 @@ def gradio_Interface(model : Scraibe = None): return gr.update(value = str(trans)),gr.update(value = trans.get_json()) # Create a thread to monitor user activity - def monitor_activity(): + def monitor_activity(model, pipe, timeout=timeout): global USER_ACTIVE while True: - time.sleep(60) # Check user activity every second + time.sleep(timeout) # Check user activity every second with user_active_lock: if not USER_ACTIVE: del model + del pipe print("Model deleted empty memory") + gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.") break - USER_ACTIVE = False + USER_ACTIVE = False # Start the monitoring thread - activity_thread = threading.Thread(target=monitor_activity) + activity_thread = threading.Thread(target=monitor_activity, args=(model, pipe)) activity_thread.daemon = True activity_thread.start() @@ -401,7 +421,7 @@ def gradio_Interface(model : Scraibe = None): header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) gr.HTML(header, visible= True, show_label=False) - + with gr.Row(): with gr.Column(): @@ -476,8 +496,6 @@ def gradio_Interface(model : Scraibe = None): annotate.click(fn = annotate_output, inputs=[annoation, out_json], outputs=[out_txt, out_json]) - - return demo From fafe5c2709ebd8c550a2dab63fc3b9b7ee7212bb Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Wed, 15 Nov 2023 16:35:26 +0100 Subject: [PATCH 04/42] test for multithreading --- test_multithreading.py | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 test_multithreading.py diff --git a/test_multithreading.py b/test_multithreading.py new file mode 100644 index 0000000..c3ab051 --- /dev/null +++ b/test_multithreading.py @@ -0,0 +1,73 @@ +import time + +from scraibe import Scraibe +import threading +import torch +import gc + +model = None +last_used = time.time() +transcribe_active = threading.Event() + +def transcribe_thread(audio): + + global model + transcribe_active.set() + print(model.autotranscribe(audio)) + transcribe_active.clear() + +def model_thread(): + global model, last_used + model = Scraibe(dia_model= "models/pyannote/config.yaml") + last_used = time.time() + +def interaction_thread(): + global model + while True: + command = input("Enter a command ('q' to quit, 'reload' to reload model): ") + print("Command entered:", command, command.lower() == 'reload') + if command.lower() == 'q': + break + elif command.lower() == 'reload': + print("Reloading model...", model) + if model is None: + model_runner = threading.Thread(target=model_thread) + model_runner.start() + model_runner.join() + else: + print("Model is already loaded.") + else: + transcribe = threading.Thread(target=transcribe_thread, args=(command,)) + transcribe.start() + transcribe.join() + +def delete_unused_model(model_runner): + global model, last_used, transcribe_active + while True: + if not transcribe_active.is_set() and (time.time() - last_used > 30) and model is not None: + + del model + model = None + + gc.collect() + torch.cuda.empty_cache() + + model_runner.join() + print("Model deleted", threading.active_count()) + time.sleep(1) + +if __name__ == "__main__": + + lock = threading.Lock() + + interaction = threading.Thread(target=interaction_thread) + model_runner = threading.Thread(target=model_thread) + model_deleter = threading.Thread(target=delete_unused_model, args=(model_runner,)) + + model_runner.start() + model_deleter.start() + + # Ensure the model is initialized before starting the interaction + model_runner.join() + interaction.start() + interaction.join() \ No newline at end of file From 105161b6a601e3464ac0371a3090ff102831016f Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 17 Nov 2023 14:12:53 +0100 Subject: [PATCH 05/42] test successed next step to implement in gradio app --- test_multithreading.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/test_multithreading.py b/test_multithreading.py index c3ab051..6f9834d 100644 --- a/test_multithreading.py +++ b/test_multithreading.py @@ -1,3 +1,4 @@ +import os import time from scraibe import Scraibe @@ -22,29 +23,38 @@ def model_thread(): last_used = time.time() def interaction_thread(): - global model + global model, model_runner while True: command = input("Enter a command ('q' to quit, 'reload' to reload model): ") - print("Command entered:", command, command.lower() == 'reload') + if command.lower() == 'q': break elif command.lower() == 'reload': print("Reloading model...", model) if model is None: + transcribe_active.clear() #black magic model_runner = threading.Thread(target=model_thread) model_runner.start() model_runner.join() + else: print("Model is already loaded.") else: - transcribe = threading.Thread(target=transcribe_thread, args=(command,)) - transcribe.start() - transcribe.join() + if os.path.exists(command): + transcribe = threading.Thread(target=transcribe_thread, args=(command,)) + transcribe.start() + transcribe.join() + + else: + print("File does not exist.") def delete_unused_model(model_runner): global model, last_used, transcribe_active + while True: - if not transcribe_active.is_set() and (time.time() - last_used > 30) and model is not None: + print("Checking for unused model...", transcribe_active.is_set()) + _unload_porperty = (not transcribe_active.is_set() and (time.time() - last_used > 30) and model is not None) + if _unload_porperty: del model model = None @@ -53,8 +63,9 @@ def delete_unused_model(model_runner): torch.cuda.empty_cache() model_runner.join() + print("Model deleted", threading.active_count()) - time.sleep(1) + time.sleep(10) if __name__ == "__main__": From f691790c00021abffdf741910c9247300e5dec49 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 17 Nov 2023 15:23:11 +0100 Subject: [PATCH 06/42] aded deamon process --- test_multithreading.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test_multithreading.py b/test_multithreading.py index 6f9834d..fb4e301 100644 --- a/test_multithreading.py +++ b/test_multithreading.py @@ -2,6 +2,8 @@ import os import time from scraibe import Scraibe + +import multiprocessing import threading import torch import gc @@ -52,7 +54,6 @@ def delete_unused_model(model_runner): global model, last_used, transcribe_active while True: - print("Checking for unused model...", transcribe_active.is_set()) _unload_porperty = (not transcribe_active.is_set() and (time.time() - last_used > 30) and model is not None) if _unload_porperty: @@ -64,7 +65,7 @@ def delete_unused_model(model_runner): model_runner.join() - print("Model deleted", threading.active_count()) + print("Model deleted") time.sleep(10) if __name__ == "__main__": @@ -72,8 +73,8 @@ if __name__ == "__main__": lock = threading.Lock() interaction = threading.Thread(target=interaction_thread) - model_runner = threading.Thread(target=model_thread) - model_deleter = threading.Thread(target=delete_unused_model, args=(model_runner,)) + model_runner = threading.Thread(target=model_thread, daemon=True) + model_deleter = threading.Thread(target=delete_unused_model, args=(model_runner,), daemon=True) model_runner.start() model_deleter.start() From bbb2c848e31e04cee3635956e1e84a897de38519 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 20 Nov 2023 15:01:51 +0100 Subject: [PATCH 07/42] rework structure of gradio app --- scraibe/__init__.py | 3 +- scraibe/app/__init__.py | 7 +- scraibe/app/activity_tracker.py | 37 +++ scraibe/app/global_var.py | 9 + scraibe/app/gradio_app.py | 504 -------------------------------- scraibe/app/interactions.py | 145 +++++++++ scraibe/app/interface.py | 129 ++++++++ scraibe/app/stg.py | 157 ++++++++++ 8 files changed, 484 insertions(+), 507 deletions(-) create mode 100644 scraibe/app/activity_tracker.py create mode 100644 scraibe/app/global_var.py delete mode 100644 scraibe/app/gradio_app.py create mode 100644 scraibe/app/interactions.py create mode 100644 scraibe/app/interface.py create mode 100644 scraibe/app/stg.py diff --git a/scraibe/__init__.py b/scraibe/__init__.py index a3a2b17..3fd77e8 100644 --- a/scraibe/__init__.py +++ b/scraibe/__init__.py @@ -7,8 +7,7 @@ from .diarisation import * from .version import get_version as _get_version from .misc import * -from .app.gradio_app import * -from .app.qtfaststart import * +from .app import * from .cli import * diff --git a/scraibe/app/__init__.py b/scraibe/app/__init__.py index dc00e7a..9e04a48 100644 --- a/scraibe/app/__init__.py +++ b/scraibe/app/__init__.py @@ -1,2 +1,7 @@ from .qtfaststart import * -from .gradio_app import * \ No newline at end of file +from .activity_tracker import * +from .interface import * +from .stg import * +from .interactions import * +from .global_var import * +from .app import * \ No newline at end of file diff --git a/scraibe/app/activity_tracker.py b/scraibe/app/activity_tracker.py new file mode 100644 index 0000000..5cced3b --- /dev/null +++ b/scraibe/app/activity_tracker.py @@ -0,0 +1,37 @@ +""" +This file contains the functions which are related to monitoring the actual app usage. +Therefore, the app is to be more efficient in the usage of the resources. +By for example, unloading or reloading the model. +""" +import time +import threading +import torch +import gc +import gradio as gr + + +timeout = 30 #seconds +USER_ACTIVE = True +user_active_lock = threading.Lock() # dummy for now + +# Create a thread to monitor user activity +def monitor_activity(model, pipe, timeout=timeout): + global USER_ACTIVE + + while True: + time.sleep(timeout) # Check user activity every second + with user_active_lock: + + if not USER_ACTIVE: + del model + del pipe + + gc.collect() + torch.cuda.empty_cache() + + + + print("Model deleted empty memory") + gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.") + break + USER_ACTIVE = False \ No newline at end of file diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py new file mode 100644 index 0000000..191e3e6 --- /dev/null +++ b/scraibe/app/global_var.py @@ -0,0 +1,9 @@ +""" +Stores global variables for the app. +""" + +# Global variable to store the model +MODEL = None + +# Global variable to track user activity +USER_ACTIVE = False \ No newline at end of file diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py deleted file mode 100644 index 6913643..0000000 --- a/scraibe/app/gradio_app.py +++ /dev/null @@ -1,504 +0,0 @@ -""" -Gradio Audio Transcription App. --------------------------------- - -This module provides an interface to transcribe audio files using the -Scraibe model. Users can either upload an audio file or record their speech -live for transcription. The application supports multiple languages and provides -options to specify the number of speakers and the language of the audio. - -Attributes: - LANGUAGES (list): A list of supported languages for transcription. - -Usage: - Run this script to start the Gradio web interface for audio transcription. - -""" - -""" -Gradio Audio Transcription App. --------------------------------- - -This module provides an interface to transcribe audio files using the -Scraibe model. Users can either upload an audio file or record their speech -live for transcription. The application supports multiple languages and provides -options to specify the number of speakers and the language of the audio. - -Attributes: - LANGUAGES (list): A list of supported languages for transcription. - -Usage: - Run this script to start the Gradio web interface for audio transcription. - -""" - - -import json -from math import pi -import os - -import gradio as gr -import threading -from tqdm import tqdm - -import time -from scraibe import Scraibe, Transcript - -theme = gr.themes.Soft( - primary_hue="green", - secondary_hue='orange', - neutral_hue="gray", -) - -LANGUAGES = [ - "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", - "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", - "Czech", "Danish", "Dutch", "English", "Estonian", - "Finnish", "French", "Galician", "German", "Greek", - "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", - "Italian", "Japanese", "Kannada", "Kazakh", "Korean", - "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", - "Maori", "Nepali", "Norwegian", "Persian", "Polish", - "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", - "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", - "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", - "Vietnamese", "Welsh" -] - -CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) - - -# Global variable to track user activity -USER_ACTIVE = True - -# Lock to synchronize access to user_active variable -user_active_lock = threading.Lock() - -# Function to reset the user activity flag -def reset_user_activity(): - global USER_ACTIVE - with user_active_lock: - USER_ACTIVE = True - -class GradioTranscriptionInterface: - """ - Interface handling the interaction between Gradio UI and the Audio Transcription system. - """ - - def __init__(self, model: Scraibe): - """ - Initializes the GradioTranscriptionInterface with a transcription model. - - Args: - model (Scraibe): Model responsible for audio transcription tasks. - """ - self.model = model - - def auto_transcribe(self, source, - num_speakers : int, - translation : bool, - language : str): - """ - Shortcut method for the Scraibe task. - - Returns: - tuple: Transcribed text (str), JSON output (dict) - """ - - kwargs = { - "num_speakers": num_speakers if num_speakers != 0 else None, - "language": language if language != "None" else None, - "task": 'translate' if translation else None - } - if isinstance(source, str): - try: - result = self.model.autotranscribe(source, **kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - - return str(result), result.get_json() - - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - try: - res = self.model.autotranscribe(s, **kwargs) - except ValueError: - _name = s.split("/")[-1] - res = f"NO TRANSCRIPT FOUND FOR {_name}" - gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.") - result.append(res) - - out = '' - out_dict = {} - for i, r in enumerate(result): - out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" - out += str(r) - out += "\n\n" - - if isinstance(r, str): - out_dict[source_names[i]] = r - else: - out_dict[source_names[i]] = r.get_dict() - - return out, json.dumps(out_dict, indent=4) - - else: - raise gr.Error("Please provide a valid audio file.") - - - def transcribe(self, source, translation, language): - """ - Shortcut method for the Transcribe task. - - Returns: - str: Transcribed text. - """ - kwargs = { - "language": language if language != "None" else None, - "task": 'translate' if translation == "Yes" else None - } - - if isinstance(source, str): - result = self.model.transcribe(source, **kwargs) - - return str(result) - - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - res = self.model.transcribe(s, **kwargs) - result.append(res) - - out = '' - for i, res in enumerate(result): - out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" - out += str(res) - out += "\n\n" - - return out - - else: - raise gr.Error("Please provide a valid audio file.") - - def perform_diarisation(self, source, num_speakers): - """ - Shortcut method for the Diarisation task. - - Returns: - str: JSON output of diarisation result. - """ - kwargs = { - "num_speakers": num_speakers if num_speakers != 0 else None, - } - - if isinstance(source, str): - try: - result = self.model.diarization(source, **kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - - return json.dumps(result, indent=2) - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Performing diarisation"): - try: - res = self.model.diarization(s, **kwargs) - except ValueError: - res = f"NO DIARISATION FOUND FOR {s}" - gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") - result.append(res) - - out = {} - - for i, res in enumerate(result): - out[source_names[i]] = res - - return json.dumps(out, indent=4) - - else: - gr.Error("Please provide a valid audio file.") - -#### -# Gradio Interface -#### - -def gradio_Interface(model : Scraibe = None, timeout = 1): - """ - Gradio Web interface for audio transcription. - - :param model: Scraibe model, defaults to None - :type model: Scraibe, optional - :param timeout: Time until model is unloaded, defaults to 600 seconds - :type timeout: int, optional - :return: Gradio Interface - :rtype: gradio.Interface - """ - - if model is None: - model = Scraibe() - - save_model_params = model.params - - pipe = GradioTranscriptionInterface(model) - - def select_task(choice): - # tell the app that it is still in use - reset_user_activity() - - if choice == 'Auto Transcribe': - - return (gr.update(visible = True), - gr.update(visible = True), - gr.update(visible = True)) - - - elif choice == 'Transcribe': - - return (gr.update(visible = False), - gr.update(visible = True), - gr.update(visible = True)) - - - elif choice == 'Diarisation': - - return (gr.update(visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - def select_origin(choice): - - # tell the app that it is still in use - reset_user_activity() - - if choice == "Upload Audio": - - return (gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Record Audio": - - return (gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Upload Video": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Record Video": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None)) - - elif choice == "File or Files": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True)) - - def run_scribe(task, - num_speakers, - translate, - language, - audio1, - audio2, - video1, - video2, - file_in, - progress = gr.Progress(track_tqdm= True)): - # get *args which are not None - - if not "model" in locals(): - gr.Warning("Model unloaded due to inactivity. Reloading the model, please wait.") - model = Scraibe(**save_model_params) - pipe = GradioTranscriptionInterface(model) - # # tell the app that it is still in use - reset_user_activity() - - progress(0, desc='Starting task...') - source = audio1 or audio2 or video1 or video2 or file_in - - if isinstance(source, list): - source = [s.name for s in source] - if len(source) == 1: - source = source[0] - - if task == 'Auto Transcribe': - - out_str , out_json = pipe.auto_transcribe(source = source, - num_speakers = num_speakers, - translation = translate, - language = language) - - if isinstance(source, str): - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = True), - gr.update(visible = True)) - else: - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - elif task == 'Transcribe': - - out = pipe.transcribe(source = source, - translation = translate, - language = language) - - return (gr.update(value = out, visible = True), - gr.update(value = None, visible = False), - gr.update(visible = False), - gr.update(visible = False)) - - elif task == 'Diarisation': - - out = pipe.perform_diarisation(source = source, - num_speakers = num_speakers) - - return (gr.update(value = None, visible = False), - gr.update(value = out, visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - def annotate_output(annoation : str, out_json : dict): - # get *args which are not None - - trans = Transcript.from_json(out_json) - trans = trans.annotate(*annoation.split(",")) - - return gr.update(value = str(trans)),gr.update(value = trans.get_json()) - - # Create a thread to monitor user activity - def monitor_activity(model, pipe, timeout=timeout): - global USER_ACTIVE - - while True: - time.sleep(timeout) # Check user activity every second - with user_active_lock: - - if not USER_ACTIVE: - del model - del pipe - print("Model deleted empty memory") - gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.") - break - USER_ACTIVE = False - - # Start the monitoring thread - activity_thread = threading.Thread(target=monitor_activity, args=(model, pipe)) - activity_thread.daemon = True - activity_thread.start() - - with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: - - # Define components - hname = os.path.join(CURRENT_PATH, "header.html") - header = open(hname, "r").read() - - # ugly hack to get the logo to work - header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) - - gr.HTML(header, visible= True, show_label=False) - - with gr.Row(): - - with gr.Column(): - - task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task", - value= 'Auto Transcribe') - - num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", - info = "Number of speakers in the audio file. If you don't know,\ - leave it at 0.", visible= True) - - translate = gr.Checkbox(label="Translation", choices=[True, False], value = False, - info="Select 'Yes' to have the output translated into English.", - visible= True) - - language = gr.Dropdown(LANGUAGES, - label="Language (optional)", value = "None", - info="Language of the audio file. If you don't know,\ - leave it at None.", visible= True) - - input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" - ,"File or Files"], label="Input Type", value="Upload Audio") - - audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", - interactive= True, visible= True) - audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath", - interactive= True, visible= False) - video1 = gr.Video(source="upload", type="filepath", label="Upload Video", - interactive= True, visible= False) - video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True, - interactive= True, visible= False) - file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False) - - submit = gr.Button() - - with gr.Column(): - - out_txt = gr.Textbox(label="Output", - visible= True, show_copy_button=True) - - out_json = gr.JSON(label="JSON Output", - visible= False, show_copy_button=True) - - annoation = gr.Textbox(label="Name your speaker's", - info= "Please provide a list of the speakers arranged \ - in the order in which they appear in the input. Use comma ',' \ - as a seperator. Be aware that the first name is given \ - to SPEAKER_00 the second to SPEAKER_01 and so on.", - visible= False, interactive= True) - - annotate = gr.Button(value="Annotate", visible= False, interactive= True) - - # Define usage of components - input.change(fn=select_origin, inputs=[input], - outputs=[audio1, audio2, video1, video2, file_in]) - - task.change(fn=select_task, inputs=[task], - outputs=[num_speakers, translate, language]) - - translate.change(fn= lambda x : gr.update(value = x), - inputs=[translate], outputs=[translate]) - num_speakers.change(fn= lambda x : gr.update(value = x), - inputs=[num_speakers], outputs=[num_speakers]) - language.change(fn= lambda x : gr.update(value = x), - inputs=[language], outputs=[language]) - - submit.click(fn = run_scribe, - inputs=[task, num_speakers, translate, language, audio1, - audio2, video1, video2, file_in], - outputs=[out_txt, out_json, annoation, annotate]) - - annotate.click(fn = annotate_output, inputs=[annoation, out_json], - outputs=[out_txt, out_json]) - - return demo - - -if __name__ == "__main__": - - gradio_Interface().queue().launch() \ No newline at end of file diff --git a/scraibe/app/interactions.py b/scraibe/app/interactions.py new file mode 100644 index 0000000..10659c0 --- /dev/null +++ b/scraibe/app/interactions.py @@ -0,0 +1,145 @@ +""" +This file contains ervery function that will be called when the user interacts with the +UI like pressing a button or uploading a file. +""" + +from math import pi +import gradio as gr +import scraibe.app.global_var as gv +from scraibe import Transcript + +def select_task(choice): + # tell the app that it is still in use + if choice == 'Auto Transcribe': + + return (gr.update(visible = True), + gr.update(visible = True), + gr.update(visible = True)) + + + elif choice == 'Transcribe': + + return (gr.update(visible = False), + gr.update(visible = True), + gr.update(visible = True)) + + + elif choice == 'Diarisation': + + return (gr.update(visible = True), + gr.update(visible = False), + gr.update(visible = False)) + +def select_origin(choice): + + # tell the app that it is still in use + if choice == "Upload Audio": + + return (gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Record Audio": + + return (gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Upload Video": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None)) + + elif choice == "Record Video": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True), + gr.update(visible = False, value = None)) + + elif choice == "File or Files": + + return (gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = False, value = None), + gr.update(visible = True)) + +def run_scraibe(task, + num_speakers, + translate, + language, + audio1, + audio2, + video1, + video2, + file_in, + progress = gr.Progress(track_tqdm= True)): + + # get *args which are not None + + pipe = gv.MODEL + + progress(0, desc='Starting task...') + source = audio1 or audio2 or video1 or video2 or file_in + + if isinstance(source, list): + source = [s.name for s in source] + if len(source) == 1: + source = source[0] + + if task == 'Auto Transcribe': + + out_str , out_json = pipe.auto_transcribe(source = source, + num_speakers = num_speakers, + translation = translate, + language = language) + + if isinstance(source, str): + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = True), + gr.update(visible = True)) + else: + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = False), + gr.update(visible = False)) + + elif task == 'Transcribe': + + out = pipe.transcribe(source = source, + translation = translate, + language = language) + + return (gr.update(value = out, visible = True), + gr.update(value = None, visible = False), + gr.update(visible = False), + gr.update(visible = False)) + + elif task == 'Diarisation': + + out = pipe.perform_diarisation(source = source, + num_speakers = num_speakers) + + return (gr.update(value = None, visible = False), + gr.update(value = out, visible = True), + gr.update(visible = False), + gr.update(visible = False)) + +def annotate_output(annoation : str, out_json : dict): + # get *args which are not None + + trans = Transcript.from_json(out_json) + trans = trans.annotate(*annoation.split(",")) + + return gr.update(value = str(trans)),gr.update(value = trans.get_json()) + diff --git a/scraibe/app/interface.py b/scraibe/app/interface.py new file mode 100644 index 0000000..ef9d818 --- /dev/null +++ b/scraibe/app/interface.py @@ -0,0 +1,129 @@ +""" +This file contains the actual gradio Interface which is used to interact with the user. +""" + +import gradio as gr +import os + +import scraibe.app.global_var as gv +from .interactions import * +from .stg import * + +from scraibe import Scraibe + +theme = gr.themes.Soft( + primary_hue="green", + secondary_hue='orange', + neutral_hue="gray", +) + + +LANGUAGES = [ + "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", + "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", + "Czech", "Danish", "Dutch", "English", "Estonian", + "Finnish", "French", "Galician", "German", "Greek", + "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", + "Italian", "Japanese", "Kannada", "Kazakh", "Korean", + "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", + "Maori", "Nepali", "Norwegian", "Persian", "Polish", + "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", + "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", + "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", + "Vietnamese", "Welsh" +] + +CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) + + +def gradio_Interface(pipe : Scraibe = None): + + if pipe is not None: + gv.MODEL = GradioTranscriptionInterface(pipe) + + with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: + + # Define components + hname = os.path.join(CURRENT_PATH, "header.html") + header = open(hname, "r").read() + + # ugly hack to get the logo to work + header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) + + gr.HTML(header, visible= True, show_label=False) + + with gr.Row(): + + with gr.Column(): + + task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task", + value= 'Auto Transcribe') + + num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", + info = "Number of speakers in the audio file. If you don't know,\ + leave it at 0.", visible= True) + + translate = gr.Checkbox(label="Translation", choices=[True, False], value = False, + info="Select 'Yes' to have the output translated into English.", + visible= True) + + language = gr.Dropdown(LANGUAGES, + label="Language (optional)", value = "None", + info="Language of the audio file. If you don't know,\ + leave it at None.", visible= True) + + input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" + ,"File or Files"], label="Input Type", value="Upload Audio") + + audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", + interactive= True, visible= True) + audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath", + interactive= True, visible= False) + video1 = gr.Video(source="upload", type="filepath", label="Upload Video", + interactive= True, visible= False) + video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True, + interactive= True, visible= False) + file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False) + + submit = gr.Button() + + with gr.Column(): + + out_txt = gr.Textbox(label="Output", + visible= True, show_copy_button=True) + + out_json = gr.JSON(label="JSON Output", + visible= False, show_copy_button=True) + + annoation = gr.Textbox(label="Name your speaker's", + info= "Please provide a list of the speakers arranged \ + in the order in which they appear in the input. Use comma ',' \ + as a seperator. Be aware that the first name is given \ + to SPEAKER_00 the second to SPEAKER_01 and so on.", + visible= False, interactive= True) + + annotate = gr.Button(value="Annotate", visible= False, interactive= True) + + # Define usage of components + input.change(fn=select_origin, inputs=[input], + outputs=[audio1, audio2, video1, video2, file_in]) + + task.change(fn=select_task, inputs=[task], + outputs=[num_speakers, translate, language]) + + translate.change(fn= lambda x : gr.update(value = x), + inputs=[translate], outputs=[translate]) + num_speakers.change(fn= lambda x : gr.update(value = x), + inputs=[num_speakers], outputs=[num_speakers]) + language.change(fn= lambda x : gr.update(value = x), + inputs=[language], outputs=[language]) + + submit.click(fn = run_scraibe, + inputs=[task, num_speakers, translate, language, audio1, + audio2, video1, video2, file_in], + outputs=[out_txt, out_json, annoation, annotate]) + + annotate.click(fn = annotate_output, inputs=[annoation, out_json], + outputs=[out_txt, out_json]) + + return demo \ No newline at end of file diff --git a/scraibe/app/stg.py b/scraibe/app/stg.py new file mode 100644 index 0000000..9b227a1 --- /dev/null +++ b/scraibe/app/stg.py @@ -0,0 +1,157 @@ +""" +stg - scraibe to gradio interface + +This file contains the code for the scraibe to gradio interface. +It makes adds gradio interactions to the scraibe class in the back. + +""" + +import json +import gradio as gr +from tqdm import tqdm +from scraibe import Scraibe + + +class GradioTranscriptionInterface: + """ + Interface handling the interaction between Gradio UI and the Audio Transcription system. + """ + + def __init__(self, model: Scraibe): + """ + Initializes the GradioTranscriptionInterface with a transcription model. + + Args: + model (Scraibe): Model responsible for audio transcription tasks. + """ + self.model = model + + def auto_transcribe(self, source, + num_speakers : int, + translation : bool, + language : str): + """ + Shortcut method for the Scraibe task. + + Returns: + tuple: Transcribed text (str), JSON output (dict) + """ + + kwargs = { + "num_speakers": num_speakers if num_speakers != 0 else None, + "language": language if language != "None" else None, + "task": 'translate' if translation else None + } + if isinstance(source, str): + try: + result = self.model.autotranscribe(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + + return str(result), result.get_json() + + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): + try: + res = self.model.autotranscribe(s, **kwargs) + except ValueError: + _name = s.split("/")[-1] + res = f"NO TRANSCRIPT FOUND FOR {_name}" + gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.") + result.append(res) + + out = '' + out_dict = {} + for i, r in enumerate(result): + out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" + out += str(r) + out += "\n\n" + + if isinstance(r, str): + out_dict[source_names[i]] = r + else: + out_dict[source_names[i]] = r.get_dict() + + return out, json.dumps(out_dict, indent=4) + + else: + raise gr.Error("Please provide a valid audio file.") + + + def transcribe(self, source, translation, language): + """ + Shortcut method for the Transcribe task. + + Returns: + str: Transcribed text. + """ + kwargs = { + "language": language if language != "None" else None, + "task": 'translate' if translation == "Yes" else None + } + + if isinstance(source, str): + result = self.model.transcribe(source, **kwargs) + + return str(result) + + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): + res = self.model.transcribe(s, **kwargs) + result.append(res) + + out = '' + for i, res in enumerate(result): + out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" + out += str(res) + out += "\n\n" + + return out + + else: + raise gr.Error("Please provide a valid audio file.") + + def perform_diarisation(self, source, num_speakers): + """ + Shortcut method for the Diarisation task. + + Returns: + str: JSON output of diarisation result. + """ + kwargs = { + "num_speakers": num_speakers if num_speakers != 0 else None, + } + + if isinstance(source, str): + try: + result = self.model.diarization(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + + return json.dumps(result, indent=2) + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Performing diarisation"): + try: + res = self.model.diarization(s, **kwargs) + except ValueError: + res = f"NO DIARISATION FOUND FOR {s}" + gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") + result.append(res) + + out = {} + + for i, res in enumerate(result): + out[source_names[i]] = res + + return json.dumps(out, indent=4) + + else: + gr.Error("Please provide a valid audio file.") From 93e5ce15f95dabe126501d982c88c5b928949cfe Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Sat, 25 Nov 2023 15:17:12 +0100 Subject: [PATCH 08/42] make gradio working with treads --- scraibe/app/__init__.py | 2 +- scraibe/app/global_var.py | 11 +++++++++- scraibe/app/interactions.py | 19 +++++++++++----- scraibe/app/interface.py | 7 +----- scraibe/app/multi.py | 44 +++++++++++++++++++++++++++++++++++++ scraibe/app/stg.py | 39 +++++++++++++++++++++++++------- 6 files changed, 101 insertions(+), 21 deletions(-) create mode 100644 scraibe/app/multi.py diff --git a/scraibe/app/__init__.py b/scraibe/app/__init__.py index 9e04a48..fa8f8f7 100644 --- a/scraibe/app/__init__.py +++ b/scraibe/app/__init__.py @@ -1,5 +1,5 @@ from .qtfaststart import * -from .activity_tracker import * +from .multi import * from .interface import * from .stg import * from .interactions import * diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py index 191e3e6..6d8f3cf 100644 --- a/scraibe/app/global_var.py +++ b/scraibe/app/global_var.py @@ -3,7 +3,16 @@ Stores global variables for the app. """ # Global variable to store the model +from threading import Event + +import time + + MODEL = None +MODEL_THREAD_PARAMS = None +MODEL_THREAD = None # Global variable to track user activity -USER_ACTIVE = False \ No newline at end of file +LAST_USED = time.time() +TIMEOUT = 30 #seconds +TRANSCRIBE_ACTIVE = Event() \ No newline at end of file diff --git a/scraibe/app/interactions.py b/scraibe/app/interactions.py index 10659c0..6151d64 100644 --- a/scraibe/app/interactions.py +++ b/scraibe/app/interactions.py @@ -3,10 +3,12 @@ This file contains ervery function that will be called when the user interacts w UI like pressing a button or uploading a file. """ -from math import pi +import time import gradio as gr import scraibe.app.global_var as gv from scraibe import Transcript +from scraibe.app.stg import GradioTranscriptionInterface +import threading def select_task(choice): # tell the app that it is still in use @@ -84,11 +86,18 @@ def run_scraibe(task, file_in, progress = gr.Progress(track_tqdm= True)): - # get *args which are not None + # get *args which are not None - pipe = gv.MODEL - - progress(0, desc='Starting task...') + if gv.MODEL is None and gv.MODEL_THREAD_PARAMS is not None: + progress(0, desc='Model was not loaded to conserve resources. Loading model...') + time.sleep(1) + gv.MODEL_THREAD = threading.Thread(**gv.MODEL_THREAD_PARAMS) + gv.MODEL_THREAD.start() + gv.MODEL_THREAD.join() + + pipe = GradioTranscriptionInterface() + + progress(0.1, desc='Starting task...') source = audio1 or audio2 or video1 or video2 or file_in if isinstance(source, list): diff --git a/scraibe/app/interface.py b/scraibe/app/interface.py index ef9d818..ddf10ee 100644 --- a/scraibe/app/interface.py +++ b/scraibe/app/interface.py @@ -9,8 +9,6 @@ import scraibe.app.global_var as gv from .interactions import * from .stg import * -from scraibe import Scraibe - theme = gr.themes.Soft( primary_hue="green", secondary_hue='orange', @@ -36,10 +34,7 @@ LANGUAGES = [ CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) -def gradio_Interface(pipe : Scraibe = None): - - if pipe is not None: - gv.MODEL = GradioTranscriptionInterface(pipe) +def gradio_Interface(): with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py new file mode 100644 index 0000000..4aa0c09 --- /dev/null +++ b/scraibe/app/multi.py @@ -0,0 +1,44 @@ +""" +This file contains the functions which are related to monitoring the actual app usage. +Therefore, the app is to be more efficient in the usage of the resources. +By for example, unloading or reloading the model. +""" + +import time +import gc +from typing import Union +import torch + +import scraibe.app.global_var as gv +from scraibe.autotranscript import Scraibe + + +def load_model_thread(model : Union[Scraibe, dict] = None): + if model is None: + gv.MODEL = Scraibe() + elif type(model) is Scraibe: + gv.MODEL = model + elif type(model) is dict: + gv.MODEL = Scraibe(**model) + else: + raise TypeError("model must be of type Scraibe, or dict") + + gv.LAST_USED = time.time() + +# Create a thread to monitor user activity +def delete_unused_model(): + while True: + + _unload_porperty = (not gv.TRANSCRIBE_ACTIVE.is_set() and (time.time() - gv.LAST_USED > gv.TIMEOUT) and gv.MODEL is not None) + + if _unload_porperty: + + del gv.MODEL + gv.MODEL = None + + gc.collect() + torch.cuda.empty_cache() + + gv.MODEL_THREAD.join() + + time.sleep(int(gv.TIMEOUT/5)) diff --git a/scraibe/app/stg.py b/scraibe/app/stg.py index 9b227a1..0215903 100644 --- a/scraibe/app/stg.py +++ b/scraibe/app/stg.py @@ -9,7 +9,8 @@ It makes adds gradio interactions to the scraibe class in the back. import json import gradio as gr from tqdm import tqdm -from scraibe import Scraibe + +import scraibe.app.global_var as gv class GradioTranscriptionInterface: @@ -17,14 +18,14 @@ class GradioTranscriptionInterface: Interface handling the interaction between Gradio UI and the Audio Transcription system. """ - def __init__(self, model: Scraibe): + def __init__(self): """ Initializes the GradioTranscriptionInterface with a transcription model. Args: model (Scraibe): Model responsible for audio transcription tasks. """ - self.model = model + self.model = gv.MODEL def auto_transcribe(self, source, num_speakers : int, @@ -37,6 +38,8 @@ class GradioTranscriptionInterface: tuple: Transcribed text (str), JSON output (dict) """ + gv.TRANSCRIBE_ACTIVE.set() + kwargs = { "num_speakers": num_speakers if num_speakers != 0 else None, "language": language if language != "None" else None, @@ -46,9 +49,11 @@ class GradioTranscriptionInterface: try: result = self.model.autotranscribe(source, **kwargs) except ValueError: + gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Couldn't detect any speech in the provided audio. \ Please try again!") - + + gv.TRANSCRIBE_ACTIVE.clear() return str(result), result.get_json() elif isinstance(source, list): @@ -74,10 +79,14 @@ class GradioTranscriptionInterface: out_dict[source_names[i]] = r else: out_dict[source_names[i]] = r.get_dict() + + + gv.TRANSCRIBE_ACTIVE.clear() return out, json.dumps(out_dict, indent=4) else: + gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Please provide a valid audio file.") @@ -88,14 +97,17 @@ class GradioTranscriptionInterface: Returns: str: Transcribed text. """ + + gv.TRANSCRIBE_ACTIVE.set() + kwargs = { "language": language if language != "None" else None, "task": 'translate' if translation == "Yes" else None } - + if isinstance(source, str): result = self.model.transcribe(source, **kwargs) - + gv.TRANSCRIBE_ACTIVE.clear() return str(result) elif isinstance(source, list): @@ -111,9 +123,12 @@ class GradioTranscriptionInterface: out += str(res) out += "\n\n" + gv.TRANSCRIBE_ACTIVE.clear() + return out else: + gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Please provide a valid audio file.") def perform_diarisation(self, source, num_speakers): @@ -123,6 +138,9 @@ class GradioTranscriptionInterface: Returns: str: JSON output of diarisation result. """ + + gv.TRANSCRIBE_ACTIVE.set() + kwargs = { "num_speakers": num_speakers if num_speakers != 0 else None, } @@ -131,9 +149,10 @@ class GradioTranscriptionInterface: try: result = self.model.diarization(source, **kwargs) except ValueError: + gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Couldn't detect any speech in the provided audio. \ Please try again!") - + gv.TRANSCRIBE_ACTIVE.clear() return json.dumps(result, indent=2) elif isinstance(source, list): source_names = [s.split("/")[-1] for s in source] @@ -142,6 +161,7 @@ class GradioTranscriptionInterface: try: res = self.model.diarization(s, **kwargs) except ValueError: + res = f"NO DIARISATION FOUND FOR {s}" gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") result.append(res) @@ -150,8 +170,11 @@ class GradioTranscriptionInterface: for i, res in enumerate(result): out[source_names[i]] = res - + + gv.TRANSCRIBE_ACTIVE.clear() + return json.dumps(out, indent=4) else: + gv.TRANSCRIBE_ACTIVE.clear() gr.Error("Please provide a valid audio file.") From db435c1fddf4d451e4946e8fe21ede751eda1d62 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Sat, 25 Nov 2023 15:18:09 +0100 Subject: [PATCH 09/42] removed file --- scraibe/app/activity_tracker.py | 37 --------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 scraibe/app/activity_tracker.py diff --git a/scraibe/app/activity_tracker.py b/scraibe/app/activity_tracker.py deleted file mode 100644 index 5cced3b..0000000 --- a/scraibe/app/activity_tracker.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -This file contains the functions which are related to monitoring the actual app usage. -Therefore, the app is to be more efficient in the usage of the resources. -By for example, unloading or reloading the model. -""" -import time -import threading -import torch -import gc -import gradio as gr - - -timeout = 30 #seconds -USER_ACTIVE = True -user_active_lock = threading.Lock() # dummy for now - -# Create a thread to monitor user activity -def monitor_activity(model, pipe, timeout=timeout): - global USER_ACTIVE - - while True: - time.sleep(timeout) # Check user activity every second - with user_active_lock: - - if not USER_ACTIVE: - del model - del pipe - - gc.collect() - torch.cuda.empty_cache() - - - - print("Model deleted empty memory") - gr.Warning("Model unloaded due to inactivity. Please reload the model to continue.") - break - USER_ACTIVE = False \ No newline at end of file From 32b27442e61078c5ea89b0208fc5537f8fb27d1d Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Sat, 25 Nov 2023 16:38:13 +0100 Subject: [PATCH 10/42] testing if multiprosessing works better --- test_multiprocessing.py | 105 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 test_multiprocessing.py diff --git a/test_multiprocessing.py b/test_multiprocessing.py new file mode 100644 index 0000000..ad9edc2 --- /dev/null +++ b/test_multiprocessing.py @@ -0,0 +1,105 @@ +import multiprocessing +import os +import threading +import queue +import time +import torch +from scraibe import Scraibe + +def input_thread(input_queue, processed_event): + while True: + processed_event.wait() # Wait for the previous input to be processed + processed_event.clear() # Clear the event for the next input + inp = input("Enter the path to the audio file ('q' to quit, 'reload' to reload model): ") + input_queue.put(inp) + +def clear_queue(queue): + while not queue.empty(): + try: + queue.get_nowait() + except queue.Empty: + continue + +def model_worker(request_queue, last_active_time, response_queue,loaded_event, running_event): + + loaded_event.set() + + model = Scraibe(dia_model="models/pyannote/config.yaml") + + while True: + audio_path = request_queue.get() + if audio_path == "STOP": + break + running_event.set() + transcription = model.autotranscribe(audio_path) + running_event.clear() + response_queue.put(transcription) + last_active_time.value = time.time() + + del model + torch.cuda.empty_cache() + clear_queue(request_queue) + clear_queue(response_queue) + loaded_event.clear() + + +def start_model_worker(request_queue, last_active_time, response_queue,loaded_event, running_event): + model_process = multiprocessing.Process(target=model_worker, args=(request_queue, last_active_time, response_queue,loaded_event, running_event)) + model_process.start() + return model_process + +def timer_thread(request_queue, last_active_time,loaded_event, running_event, timeout=30): + while True: + time.sleep(timeout) + + if time.time() - last_active_time.value > timeout and loaded_event.is_set() and not running_event.is_set(): + print(f"No activity for the last {timeout} seconds. Stopping the model worker.", flush=True) + request_queue.put("STOP") + +if __name__ == "__main__": + request_queue = multiprocessing.Queue() + response_queue = multiprocessing.Queue() + input_queue = queue.Queue() + last_active_time = multiprocessing.Value('d', time.time()) + loaded_event = multiprocessing.Event() + running_event = multiprocessing.Event() + + processed_event = multiprocessing.Event() + processed_event.set() # Initially set to allow the first input + + model_process = start_model_worker(request_queue, last_active_time, response_queue,loaded_event ,running_event) + timer = threading.Thread(target=timer_thread, args=(request_queue, last_active_time, loaded_event, running_event), daemon=True) + input_handler = threading.Thread(target=input_thread, args=(input_queue,processed_event)) + + timer.start() + input_handler.start() + + while True: + + audio_file_path = input_queue.get() # Get input from the input thread + print(audio_file_path) + + if audio_file_path.lower() == 'q': + request_queue.put("STOP") + model_process.join() + break + elif audio_file_path.lower() == 'reload': + if loaded_event.is_set(): + request_queue.put("STOP") + model_process.join() + model_process = start_model_worker(request_queue, last_active_time, response_queue, loaded_event, running_event) + print("Model reloaded.") + elif not os.path.exists(audio_file_path): + print("File does not exist.") + else: + if not loaded_event.is_set(): + model_process = start_model_worker(request_queue, last_active_time, response_queue, loaded_event, running_event) + request_queue.put(audio_file_path) + transcription = response_queue.get() + print(transcription) + + processed_event.set() # Signal that the input has been processed + + model_process.join() + timer.join() + input_handler.join() From 9eb9f5af8d8a531d431bb3ae3d7dd8cf6b0f16ec Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Thu, 7 Dec 2023 16:22:52 +0100 Subject: [PATCH 11/42] Make everything work in processes and adding config to customize instance --- scraibe/app/config.yml | 48 +++++++++++++++++++ scraibe/app/global_var.py | 20 +++++--- scraibe/app/interactions.py | 49 ++++++++++---------- scraibe/app/multi.py | 91 +++++++++++++++++++++++++++---------- scraibe/app/stg.py | 87 ++++++++++++++++++----------------- scraibe/app/utils.py | 42 +++++++++++++++++ 6 files changed, 241 insertions(+), 96 deletions(-) create mode 100644 scraibe/app/config.yml create mode 100644 scraibe/app/utils.py diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml new file mode 100644 index 0000000..16d296c --- /dev/null +++ b/scraibe/app/config.yml @@ -0,0 +1,48 @@ +launch: + # The following are the default values for the launch configuration + # for more informations look at https://www.gradio.app/docs/interface + server_port: 8080 + server_name: 0.0.0.0 + inbrowser: true + inline: false + max-threads: 40 + quiet: false + auth: + enabled: false + username: admin + password: admin + auth_message: "Please enter your credentials" + show_error : false + favicon_path : null + ssl_keyfile : null + ssl_certfile : null + ssl_keyfile_password : null + ssl_verify : false + quiet : false + show_api : false + allowed_paths : null + blocked_paths : null + root_path : null + app_kwargs : null + state_session_capacity : 1000 + share_server_address : null + share_server_protocol : null + share : false + debug : false +queue: + # The following are the default values for the queue configuration + # for more informations look at hhttps://www.gradio.app/docs/interface + status_update_rate : 'auto' + api_open : null + max_size : null + concurrency_count : null + default_concurrency_limit : 'not_set' +layout: + header: scraibe/app/header.html + footer: null + logo: scraibe/app/logo.svg +model: + whisper_model : null + dia_model: null +advanced: + timeout: 300 #seconds e.g. 5 minutes diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py index 6d8f3cf..99f6eea 100644 --- a/scraibe/app/global_var.py +++ b/scraibe/app/global_var.py @@ -3,16 +3,22 @@ Stores global variables for the app. """ # Global variable to store the model -from threading import Event - +import multiprocessing +import os import time +import yaml +REQUEST_QUEUE = multiprocessing.Queue() # audio file path as string +RESPONSE_QUEUE = multiprocessing.Queue() # transcription as string +LAST_ACTIVE_TIME = multiprocessing.Value('d', time.time()) # time of last activity +LOADED_EVENT = multiprocessing.Event() # model loaded event +RUNNING_EVENT = multiprocessing.Event() # model running event -MODEL = None -MODEL_THREAD_PARAMS = None -MODEL_THREAD = None +MODEL_PARAMS = None # model parameters +MODEL_PROCESS = None # model process to handle globally # Global variable to track user activity LAST_USED = time.time() -TIMEOUT = 30 #seconds -TRANSCRIBE_ACTIVE = Event() \ No newline at end of file +TIMEOUT = None #seconds + +DEFAULT_APP_CONIFG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.yml") diff --git a/scraibe/app/interactions.py b/scraibe/app/interactions.py index 6151d64..1719388 100644 --- a/scraibe/app/interactions.py +++ b/scraibe/app/interactions.py @@ -3,12 +3,12 @@ This file contains ervery function that will be called when the user interacts w UI like pressing a button or uploading a file. """ +from re import M import time import gradio as gr import scraibe.app.global_var as gv from scraibe import Transcript -from scraibe.app.stg import GradioTranscriptionInterface -import threading +from .multi import start_model_worker def select_task(choice): # tell the app that it is still in use @@ -84,33 +84,37 @@ def run_scraibe(task, video1, video2, file_in, - progress = gr.Progress(track_tqdm= True)): + progress = gr.Progress(track_tqdm=False)): # get *args which are not None + if gv.MODEL_PROCESS is None or not gv.MODEL_PROCESS.is_alive(): + #progress(0.0, desc='Loading model...') + gv.MODEL_PROCESS = start_model_worker(gv.MODEL_PARAMS, + gv.REQUEST_QUEUE, + gv.LAST_ACTIVE_TIME, + gv.RESPONSE_QUEUE, + gv.LOADED_EVENT, + gv.RUNNING_EVENT) - if gv.MODEL is None and gv.MODEL_THREAD_PARAMS is not None: - progress(0, desc='Model was not loaded to conserve resources. Loading model...') - time.sleep(1) - gv.MODEL_THREAD = threading.Thread(**gv.MODEL_THREAD_PARAMS) - gv.MODEL_THREAD.start() - gv.MODEL_THREAD.join() - - pipe = GradioTranscriptionInterface() - - progress(0.1, desc='Starting task...') + # progress(0.1, desc='Starting task...') source = audio1 or audio2 or video1 or video2 or file_in if isinstance(source, list): source = [s.name for s in source] if len(source) == 1: source = source[0] - + + config = dict(source = source, + task = task, + num_speakers = num_speakers, + translate = translate, + language = language) + + gv.REQUEST_QUEUE.put(config) + if task == 'Auto Transcribe': - - out_str , out_json = pipe.auto_transcribe(source = source, - num_speakers = num_speakers, - translation = translate, - language = language) + + out_str , out_json = gv.RESPONSE_QUEUE.get() if isinstance(source, str): return (gr.update(value = out_str, visible = True), @@ -125,9 +129,7 @@ def run_scraibe(task, elif task == 'Transcribe': - out = pipe.transcribe(source = source, - translation = translate, - language = language) + out = gv.RESPONSE_QUEUE.get() return (gr.update(value = out, visible = True), gr.update(value = None, visible = False), @@ -136,8 +138,7 @@ def run_scraibe(task, elif task == 'Diarisation': - out = pipe.perform_diarisation(source = source, - num_speakers = num_speakers) + out = gv.RESPONSE_QUEUE.get() return (gr.update(value = None, visible = False), gr.update(value = out, visible = True), diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py index 4aa0c09..17fd1bb 100644 --- a/scraibe/app/multi.py +++ b/scraibe/app/multi.py @@ -4,41 +4,86 @@ Therefore, the app is to be more efficient in the usage of the resources. By for example, unloading or reloading the model. """ + + import time import gc from typing import Union +import multiprocessing import torch +import signal -import scraibe.app.global_var as gv -from scraibe.autotranscript import Scraibe +from gradio import Warning +from scraibe.autotranscript import Scraibe +from .stg import GradioTranscriptionInterface + +def init_worker(): + signal.signal(signal.SIGINT, signal.SIG_IGN) -def load_model_thread(model : Union[Scraibe, dict] = None): - if model is None: - gv.MODEL = Scraibe() - elif type(model) is Scraibe: - gv.MODEL = model - elif type(model) is dict: - gv.MODEL = Scraibe(**model) +def clear_queue(queue): + while not queue.empty(): + try: + queue.get_nowait() + except queue.Empty: + continue + +def model_worker(model_params : Union[Scraibe, dict], + request_queue, + last_active_time, + response_queue, + loaded_event, + running_event, + *args, **kwargs): + + loaded_event.set() + + if model_params is None: + _model = Scraibe() + elif type(model_params) is Scraibe: + _model = model_params + elif type(model_params) is dict: + _model = Scraibe(**model_params) else: raise TypeError("model must be of type Scraibe, or dict") - gv.LAST_USED = time.time() - -# Create a thread to monitor user activity -def delete_unused_model(): + model = GradioTranscriptionInterface(_model) + while True: - _unload_porperty = (not gv.TRANSCRIBE_ACTIVE.is_set() and (time.time() - gv.LAST_USED > gv.TIMEOUT) and gv.MODEL is not None) + req = request_queue.get() - if _unload_porperty: + if req == "STOP": - del gv.MODEL - gv.MODEL = None - - gc.collect() - torch.cuda.empty_cache() + break + elif type(req) is dict: + runner = model.get_task_from_str(req.pop("task")) + running_event.set() + transcription = runner(**req) + running_event.clear() + response_queue.put(transcription) + last_active_time.value = time.time() + else: + raise TypeError("request must be of type dict") - gv.MODEL_THREAD.join() - - time.sleep(int(gv.TIMEOUT/5)) + del model + torch.cuda.empty_cache() + gc.collect() + clear_queue(request_queue) + clear_queue(response_queue) + loaded_event.clear() + +def start_model_worker(model_params, request_queue, last_active_time, response_queue,loaded_event, running_event, *args, **kwargs): + context = multiprocessing.get_context('spawn') + model_process = context.Process(target=model_worker, args=(model_params, request_queue, last_active_time, response_queue,loaded_event, running_event, *args), kwargs=kwargs) + model_process.start() + return model_process + +def timer_thread(request_queue, last_active_time,loaded_event, running_event, timeout=30): + while True: + time.sleep(timeout) + + if time.time() - last_active_time.value > timeout and loaded_event.is_set() and not running_event.is_set(): + print(f"No activity for the last {timeout} seconds. Stopping the model worker.", flush=True) + request_queue.put("STOP") + Warning("Model worker stopped due to inactivity.") \ No newline at end of file diff --git a/scraibe/app/stg.py b/scraibe/app/stg.py index 0215903..1b9caf7 100644 --- a/scraibe/app/stg.py +++ b/scraibe/app/stg.py @@ -18,19 +18,19 @@ class GradioTranscriptionInterface: Interface handling the interaction between Gradio UI and the Audio Transcription system. """ - def __init__(self): + def __init__(self, model): """ Initializes the GradioTranscriptionInterface with a transcription model. Args: model (Scraibe): Model responsible for audio transcription tasks. """ - self.model = gv.MODEL + self.model = model - def auto_transcribe(self, source, + def autotranscribe(self, source, num_speakers : int, - translation : bool, - language : str): + translate : bool, + language : str,*args ,**kwargs): """ Shortcut method for the Scraibe task. @@ -38,22 +38,18 @@ class GradioTranscriptionInterface: tuple: Transcribed text (str), JSON output (dict) """ - gv.TRANSCRIBE_ACTIVE.set() - - kwargs = { + _kwargs = { "num_speakers": num_speakers if num_speakers != 0 else None, "language": language if language != "None" else None, - "task": 'translate' if translation else None + "task": 'translate' if translate else None } if isinstance(source, str): try: - result = self.model.autotranscribe(source, **kwargs) + result = self.model.autotranscribe(source, **_kwargs) except ValueError: - gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Couldn't detect any speech in the provided audio. \ Please try again!") - - gv.TRANSCRIBE_ACTIVE.clear() + return str(result), result.get_json() elif isinstance(source, list): @@ -61,7 +57,7 @@ class GradioTranscriptionInterface: result = [] for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): try: - res = self.model.autotranscribe(s, **kwargs) + res = self.model.autotranscribe(s, **_kwargs) except ValueError: _name = s.split("/")[-1] res = f"NO TRANSCRIPT FOUND FOR {_name}" @@ -79,42 +75,36 @@ class GradioTranscriptionInterface: out_dict[source_names[i]] = r else: out_dict[source_names[i]] = r.get_dict() - - - gv.TRANSCRIBE_ACTIVE.clear() return out, json.dumps(out_dict, indent=4) else: - gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Please provide a valid audio file.") - def transcribe(self, source, translation, language): + def transcribe(self, source, translate, language,*args ,**kwargs): """ Shortcut method for the Transcribe task. Returns: str: Transcribed text. """ - - gv.TRANSCRIBE_ACTIVE.set() - - kwargs = { + + _kwargs = { "language": language if language != "None" else None, - "task": 'translate' if translation == "Yes" else None + "task": 'translate' if translate == "Yes" else None } if isinstance(source, str): - result = self.model.transcribe(source, **kwargs) - gv.TRANSCRIBE_ACTIVE.clear() + result = self.model.transcribe(source, **_kwargs) + return str(result) elif isinstance(source, list): source_names = [s.split("/")[-1] for s in source] result = [] for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - res = self.model.transcribe(s, **kwargs) + res = self.model.transcribe(s, **_kwargs) result.append(res) out = '' @@ -123,15 +113,12 @@ class GradioTranscriptionInterface: out += str(res) out += "\n\n" - gv.TRANSCRIBE_ACTIVE.clear() - return out else: - gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Please provide a valid audio file.") - def perform_diarisation(self, source, num_speakers): + def diarisation(self, source, num_speakers, *args ,**kwargs): """ Shortcut method for the Diarisation task. @@ -139,27 +126,24 @@ class GradioTranscriptionInterface: str: JSON output of diarisation result. """ - gv.TRANSCRIBE_ACTIVE.set() - - kwargs = { + _kwargs = { "num_speakers": num_speakers if num_speakers != 0 else None, } if isinstance(source, str): try: - result = self.model.diarization(source, **kwargs) + result = self.model.diarization(source, **_kwargs) except ValueError: - gv.TRANSCRIBE_ACTIVE.clear() raise gr.Error("Couldn't detect any speech in the provided audio. \ Please try again!") - gv.TRANSCRIBE_ACTIVE.clear() + return json.dumps(result, indent=2) elif isinstance(source, list): source_names = [s.split("/")[-1] for s in source] result = [] for s in tqdm(source, total=len(source),desc = "Performing diarisation"): try: - res = self.model.diarization(s, **kwargs) + res = self.model.diarization(s, **_kwargs) except ValueError: res = f"NO DIARISATION FOUND FOR {s}" @@ -171,10 +155,29 @@ class GradioTranscriptionInterface: for i, res in enumerate(result): out[source_names[i]] = res - gv.TRANSCRIBE_ACTIVE.clear() - return json.dumps(out, indent=4) else: - gv.TRANSCRIBE_ACTIVE.clear() - gr.Error("Please provide a valid audio file.") + gr.Error("Please provide a valid audio file.") + + def get_task_from_str(self, task): + """ + Returns the coresponing task function based on the task string. + + params: + task (str): Task string. Can be one of the following: + - 'Auto Transcribe' + - 'Transcribe' + - 'Diarisation' + """ + + if task == 'Auto Transcribe': + return self.autotranscribe + elif task == 'Transcribe': + return self.transcribe + elif task == 'Diarisation': + return self.diarisation + else: + raise ValueError("Invalid task string.") + + diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py new file mode 100644 index 0000000..b41a88f --- /dev/null +++ b/scraibe/app/utils.py @@ -0,0 +1,42 @@ +import scraibe.app.global_var as gv +import yaml + +def load_config(original_config_path = gv.DEFAULT_APP_CONIFG_PATH, override_yaml_path=None, **kwargs): + + + # Load the original configuration + with open(original_config_path, 'r') as file: + config = yaml.safe_load(file) + + # Override with another YAML file if provided + if override_yaml_path: + with open(override_yaml_path, 'r') as file: + override_config = yaml.safe_load(file) + apply_overrides(config, override_config) + + # Apply overrides from kwargs + apply_overrides(config, kwargs) + + return config + +def apply_overrides(orig_dict, override_dict): + """ Recursively apply overrides to the configuration. """ + for key, value in override_dict.items(): + if isinstance(value, dict): + # If the value is a dict, apply recursively + apply_overrides(orig_dict.get(key, {}), value) + else: + # If the value is not a dict, search for the key and update + if update_nested_key(orig_dict, key, value): + continue # Key was found and updated + orig_dict[key] = value # Key not found, update at this level + +def update_nested_key(d, key, value): + """ Recursively search and update the key in nested dictionary. """ + if key in d: + d[key] = value + return True + for k, v in d.items(): + if isinstance(v, dict) and update_nested_key(v, key, value): + return True + return False \ No newline at end of file From 4379d1e185be9bcaaddbc3ef4704fe5686dab532 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 8 Dec 2023 14:30:26 +0100 Subject: [PATCH 12/42] removes signal --- scraibe/app/multi.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py index 17fd1bb..ce61f70 100644 --- a/scraibe/app/multi.py +++ b/scraibe/app/multi.py @@ -11,16 +11,11 @@ import gc from typing import Union import multiprocessing import torch -import signal from gradio import Warning from scraibe.autotranscript import Scraibe from .stg import GradioTranscriptionInterface -def init_worker(): - signal.signal(signal.SIGINT, signal.SIG_IGN) - - def clear_queue(queue): while not queue.empty(): try: From 42f86ba2ca7e4682f34471e754483b2320e40bb4 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 8 Dec 2023 14:30:43 +0100 Subject: [PATCH 13/42] changed pyannote path --- scraibe/app/config.yml | 2 +- scraibe/cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml index 16d296c..4eda2e4 100644 --- a/scraibe/app/config.yml +++ b/scraibe/app/config.yml @@ -11,7 +11,7 @@ launch: enabled: false username: admin password: admin - auth_message: "Please enter your credentials" + auth_message: null show_error : false favicon_path : null ssl_keyfile : null diff --git a/scraibe/cli.py b/scraibe/cli.py index b05da92..c023f38 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -9,7 +9,7 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import json from .autotranscript import Scraibe -from .app.gradio_app import gradio_Interface +from .app.app import gradio_Interface from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE from torch.cuda import is_available From 4ca9aa195a327a58a048792d9260b04ff6e24463 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 8 Dec 2023 14:30:56 +0100 Subject: [PATCH 14/42] added utils file --- scraibe/app/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scraibe/app/__init__.py b/scraibe/app/__init__.py index fa8f8f7..a38ad86 100644 --- a/scraibe/app/__init__.py +++ b/scraibe/app/__init__.py @@ -4,4 +4,5 @@ from .interface import * from .stg import * from .interactions import * from .global_var import * +from .utils import * from .app import * \ No newline at end of file From f494895376d35992fe3a1c8256263c1d776d8fb7 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 8 Dec 2023 14:31:17 +0100 Subject: [PATCH 15/42] removes depencency --- scraibe/app/global_var.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py index 99f6eea..5599320 100644 --- a/scraibe/app/global_var.py +++ b/scraibe/app/global_var.py @@ -6,7 +6,6 @@ Stores global variables for the app. import multiprocessing import os import time -import yaml REQUEST_QUEUE = multiprocessing.Queue() # audio file path as string RESPONSE_QUEUE = multiprocessing.Queue() # transcription as string @@ -22,3 +21,4 @@ LAST_USED = time.time() TIMEOUT = None #seconds DEFAULT_APP_CONIFG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.yml") + From 9a8cdb2a64061dead12de67aac7d64ae2e7ba096 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 8 Dec 2023 14:31:48 +0100 Subject: [PATCH 16/42] added utils to handle config file (does not work yet) --- scraibe/app/utils.py | 208 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 175 insertions(+), 33 deletions(-) diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py index b41a88f..cff8b9e 100644 --- a/scraibe/app/utils.py +++ b/scraibe/app/utils.py @@ -1,42 +1,184 @@ -import scraibe.app.global_var as gv +import os +import warnings import yaml -def load_config(original_config_path = gv.DEFAULT_APP_CONIFG_PATH, override_yaml_path=None, **kwargs): +import scraibe.app.global_var as gv + + +class ConfigLoader: + def __init__(self, config): + + self.config = config + def restore_defaults_for_keys(self, keys): + """ + Restores specified keys to their default values, including nested keys. + + Args: + keys (list): A list of keys or paths to keys (for nested dictionaries) to restore to default values. + Each key or path should be a list of keys leading to the desired key. + """ + default_config = self.get_default_config() + + + self.apply_overrides(self.config, default_config, keys) + + + + @classmethod + def load_config(cls, yaml_path = None, **kwargs): + """ + Load the configuration file and apply overrides. + + Args: + yaml_path (str): Path to the YAML file containing overrides. + **kwargs: Additional overrides as keyword arguments. + + Returns: + Config: A Config object with the loaded configuration. + """ + + # Load the original configuration + config = cls.get_default_config() - # Load the original configuration - with open(original_config_path, 'r') as file: - config = yaml.safe_load(file) + # Override with another YAML file if provided + if yaml_path: + with open(yaml_path, 'r') as file: + override_config = yaml.safe_load(file) + cls.apply_overrides(config, override_config) - # Override with another YAML file if provided - if override_yaml_path: - with open(override_yaml_path, 'r') as file: - override_config = yaml.safe_load(file) - apply_overrides(config, override_config) + # Apply overrides from kwargs + cls.apply_overrides(config, kwargs) + return cls(config) + + @staticmethod + def apply_overrides(orig_dict, override_dict, specific_keys=None): + """ Recursively apply overrides to the configuration, only for specific keys. """ + if specific_keys is None: + specific_keys = override_dict.keys() # If no specific keys provided, apply to all keys - # Apply overrides from kwargs - apply_overrides(config, kwargs) + + for key, value in override_dict.items(): + + if key not in specific_keys: + + continue # Skip keys not in the specific keys set + + if isinstance(value, dict): + # If the value is a dict, apply recursively + sub_dict = orig_dict.get(key, {}) + ConfigLoader.apply_overrides(sub_dict, value, specific_keys) + orig_dict[key] = sub_dict + else: + # Apply override for this key + print("HI iam here", key, value) + orig_dict[key] = value + print("HI", orig_dict) - return config + # @staticmethod + # def apply_overrides(orig_dict, override_dict): + + # """ Recursively apply overrides to the configuration. """ + # for key, value in override_dict.items(): + # if isinstance(value, dict): + # # If the value is a dict, apply recursively + # ConfigLoader.apply_overrides(orig_dict.get(key, {}), value) + # else: + # # If the value is not a dict, search for the key and update + # if ConfigLoader.update_nested_key(orig_dict, key, value): + # continue # Key was found and updated + # orig_dict[key] = value # Key not found, update at this level -def apply_overrides(orig_dict, override_dict): - """ Recursively apply overrides to the configuration. """ - for key, value in override_dict.items(): - if isinstance(value, dict): - # If the value is a dict, apply recursively - apply_overrides(orig_dict.get(key, {}), value) - else: - # If the value is not a dict, search for the key and update - if update_nested_key(orig_dict, key, value): - continue # Key was found and updated - orig_dict[key] = value # Key not found, update at this level - -def update_nested_key(d, key, value): - """ Recursively search and update the key in nested dictionary. """ - if key in d: - d[key] = value - return True - for k, v in d.items(): - if isinstance(v, dict) and update_nested_key(v, key, value): + @staticmethod + def update_nested_key(d, key, value): + """ Recursively search and update the key in nested dictionary. """ + + if key in d: + d[key] = value return True - return False \ No newline at end of file + for k, v in d.items(): + if isinstance(v, dict) and ConfigLoader.update_nested_key(v, key, value): + return True + return False + + @staticmethod + def get_default_config(): + """ Return the default configuration. """ + with open(gv.DEFAULT_APP_CONIFG_PATH , 'r') as file: + config = yaml.safe_load(file) + return config + + +class AppConfig(ConfigLoader): + + def __init__(self, config): + + self.config = config + + self.set_global_vars_from_config() + self.set_launch_options() + self.set_layout_options() + + self.lauch = self.config.get("launch") + self.model = self.config.get("model") + self.advanced = self.config.get("advanced") + self.queue = self.config.get("queue") + self.layout = self.config.get("layout") + + def set_global_vars_from_config(self): + """ + Sets the global variables from a configuration dictionary. + + Args: + config (dict): A dictionary containing the parameters for the model. Modify the default parameters in the config.yml file. + + Returns: + None + + """ + + gv.MODEL_PARAMS = self.config.get('model') + gv.TIMEOUT = self.config.get("advanced").get('timeout') + + def set_launch_options(self): + + launch_options = self.config.get("launch") + + if launch_options.get('auth').pop('enabled'): + self.config['launch']['auth'] = (launch_options.get('auth').pop('username'), + launch_options.get('auth').pop('password')) + else: + self.config['launch']['auth'] = None + + def set_layout_options(self): + self.config['layout']['header'] = self.check_and_set_path(self.config['layout'], 'header') + self.config['layout']['footer'] = self.check_and_set_path(self.config['layout'], 'footer') + self.config['layout']['logo'] = self.check_and_set_path(self.config['layout'], 'logo') + + + @staticmethod + def check_and_set_path(config_item, key): + """ + Check if the file exists at the given path. If not, try with CURRENT_PATH. + Raise FileNotFoundError if the file still doesn't exist. + """ + _current_path = os.path.dirname(os.path.realpath(__file__)) # Define your CURRENT_PATH + + file_path = config_item.get(key) + if file_path is None: + return None + if not os.path.exists(file_path): + new_path = os.path.join(_current_path, file_path) + if not os.path.exists(new_path): + warnings.warn(f"{key.capitalize()} file not found: {config_item[key]} \n" \ + "fall back to default.") + else: + config_item[key] = new_path + + return config_item[key] + + + + + + \ No newline at end of file From 7c200d4506b7c21711346cd0de1ab55dc3f76aec Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Wed, 13 Dec 2023 17:40:32 +0100 Subject: [PATCH 17/42] config.yml updating and reloaading default works --- scraibe/app/config.yml | 6 ++--- scraibe/app/utils.py | 51 ++++++++++++++++-------------------------- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml index 4eda2e4..ad6ce98 100644 --- a/scraibe/app/config.yml +++ b/scraibe/app/config.yml @@ -8,9 +8,9 @@ launch: max-threads: 40 quiet: false auth: - enabled: false - username: admin - password: admin + auth_enabled: false + auth_username: admin + auth_password: admin auth_message: null show_error : false favicon_path : null diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py index cff8b9e..17950c5 100644 --- a/scraibe/app/utils.py +++ b/scraibe/app/utils.py @@ -10,7 +10,7 @@ class ConfigLoader: self.config = config - def restore_defaults_for_keys(self, keys): + def restore_defaults_for_keys(self, *args): """ Restores specified keys to their default values, including nested keys. @@ -20,8 +20,8 @@ class ConfigLoader: """ default_config = self.get_default_config() - - self.apply_overrides(self.config, default_config, keys) + for key in args: + self.apply_overrides(self.config, default_config, key) @@ -52,42 +52,29 @@ class ConfigLoader: return cls(config) @staticmethod - def apply_overrides(orig_dict, override_dict, specific_keys=None): + def apply_overrides(orig_dict, override_dict, specific=None): """ Recursively apply overrides to the configuration, only for specific keys. """ - if specific_keys is None: - specific_keys = override_dict.keys() # If no specific keys provided, apply to all keys - - for key, value in override_dict.items(): - if key not in specific_keys: - - continue # Skip keys not in the specific keys set - if isinstance(value, dict): # If the value is a dict, apply recursively sub_dict = orig_dict.get(key, {}) - ConfigLoader.apply_overrides(sub_dict, value, specific_keys) + ConfigLoader.apply_overrides(sub_dict, value, specific) orig_dict[key] = sub_dict else: # Apply override for this key - print("HI iam here", key, value) - orig_dict[key] = value - print("HI", orig_dict) - - # @staticmethod - # def apply_overrides(orig_dict, override_dict): - - # """ Recursively apply overrides to the configuration. """ - # for key, value in override_dict.items(): - # if isinstance(value, dict): - # # If the value is a dict, apply recursively - # ConfigLoader.apply_overrides(orig_dict.get(key, {}), value) - # else: - # # If the value is not a dict, search for the key and update - # if ConfigLoader.update_nested_key(orig_dict, key, value): - # continue # Key was found and updated - # orig_dict[key] = value # Key not found, update at this level + if specific is None: + # If no specific keys are provided, update the key + # If the value is not a dict, search for the key and update + if ConfigLoader.update_nested_key(orig_dict, key, value): + continue # Key was found and updated + orig_dict[key] = value # Key not found, update at this level + + elif key in specific: + # If specific keys are provided, only update if the key is in the list + if ConfigLoader.update_nested_key(orig_dict, specific, value): + continue # Key was found and updated + orig_dict[specific] = value @staticmethod def update_nested_key(d, key, value): @@ -144,8 +131,8 @@ class AppConfig(ConfigLoader): launch_options = self.config.get("launch") - if launch_options.get('auth').pop('enabled'): - self.config['launch']['auth'] = (launch_options.get('auth').pop('username'), + if launch_options.get('auth').pop('auth_enabled'): + self.config['launch']['auth'] = (launch_options.get('auth').pop('auth_username'), launch_options.get('auth').pop('password')) else: self.config['launch']['auth'] = None From 6217e3a9b3d74dd0b2bdbf061b2cb4dcca442c40 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Wed, 24 Jan 2024 15:58:37 +0100 Subject: [PATCH 18/42] adapt everything to work with new config file --- scraibe/app/config.yml | 21 +++++++++-------- scraibe/app/interface.py | 18 +++++++------- scraibe/app/multi.py | 14 +++++++++-- scraibe/app/utils.py | 51 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 81 insertions(+), 23 deletions(-) diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml index ad6ce98..9f6a826 100644 --- a/scraibe/app/config.yml +++ b/scraibe/app/config.yml @@ -1,10 +1,13 @@ launch: # The following are the default values for the launch configuration # for more informations look at https://www.gradio.app/docs/interface - server_port: 8080 + server_port: 7860 server_name: 0.0.0.0 - inbrowser: true inline: false + inbrowser: true + share : false + debug : false + max-threads: 40 quiet: false auth: @@ -12,7 +15,9 @@ launch: auth_username: admin auth_password: admin auth_message: null + prevent_thread_lock : false show_error : false + show_tips : true favicon_path : null ssl_keyfile : null ssl_certfile : null @@ -22,21 +27,17 @@ launch: show_api : false allowed_paths : null blocked_paths : null - root_path : null + root_path : '' app_kwargs : null - state_session_capacity : 1000 - share_server_address : null - share_server_protocol : null - share : false - debug : false + queue: # The following are the default values for the queue configuration # for more informations look at hhttps://www.gradio.app/docs/interface + concurrency_count : 1 status_update_rate : 'auto' api_open : null max_size : null - concurrency_count : null - default_concurrency_limit : 'not_set' + layout: header: scraibe/app/header.html footer: null diff --git a/scraibe/app/interface.py b/scraibe/app/interface.py index ddf10ee..fce582c 100644 --- a/scraibe/app/interface.py +++ b/scraibe/app/interface.py @@ -31,21 +31,18 @@ LANGUAGES = [ "Vietnamese", "Welsh" ] -CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) -def gradio_Interface(): + +def gradio_Interface(layout = None,): with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: # Define components - hname = os.path.join(CURRENT_PATH, "header.html") - header = open(hname, "r").read() - # ugly hack to get the logo to work - header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" ) - - gr.HTML(header, visible= True, show_label=False) + + if layout.get('header') is not None: + gr.HTML(layout.get('header'), visible= True, show_label=False) with gr.Row(): @@ -98,7 +95,10 @@ def gradio_Interface(): visible= False, interactive= True) annotate = gr.Button(value="Annotate", visible= False, interactive= True) - + + if layout.get('footer') is not None: + gr.HTML(layout.get('footer'), visible= True, show_label=False) + # Define usage of components input.change(fn=select_origin, inputs=[input], outputs=[audio1, audio2, video1, video2, file_in]) diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py index ce61f70..ec9f17e 100644 --- a/scraibe/app/multi.py +++ b/scraibe/app/multi.py @@ -68,13 +68,23 @@ def model_worker(model_params : Union[Scraibe, dict], clear_queue(response_queue) loaded_event.clear() -def start_model_worker(model_params, request_queue, last_active_time, response_queue,loaded_event, running_event, *args, **kwargs): +def start_model_worker(model_params, + request_queue, + last_active_time, + response_queue, + loaded_event, + running_event, + *args, **kwargs): context = multiprocessing.get_context('spawn') model_process = context.Process(target=model_worker, args=(model_params, request_queue, last_active_time, response_queue,loaded_event, running_event, *args), kwargs=kwargs) model_process.start() return model_process -def timer_thread(request_queue, last_active_time,loaded_event, running_event, timeout=30): +def timer_thread(request_queue, + last_active_time, + loaded_event, + running_event, + timeout=30): while True: time.sleep(timeout) diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py index 17950c5..35d2f8e 100644 --- a/scraibe/app/utils.py +++ b/scraibe/app/utils.py @@ -1,9 +1,12 @@ +from email import header +from math import e import os import warnings import yaml import scraibe.app.global_var as gv +CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) class ConfigLoader: def __init__(self, config): @@ -42,6 +45,7 @@ class ConfigLoader: config = cls.get_default_config() # Override with another YAML file if provided + if yaml_path: with open(yaml_path, 'r') as file: override_config = yaml.safe_load(file) @@ -106,7 +110,7 @@ class AppConfig(ConfigLoader): self.set_launch_options() self.set_layout_options() - self.lauch = self.config.get("launch") + self.launch = self.config.get("launch") self.model = self.config.get("model") self.advanced = self.config.get("advanced") self.queue = self.config.get("queue") @@ -141,7 +145,50 @@ class AppConfig(ConfigLoader): self.config['layout']['header'] = self.check_and_set_path(self.config['layout'], 'header') self.config['layout']['footer'] = self.check_and_set_path(self.config['layout'], 'footer') self.config['layout']['logo'] = self.check_and_set_path(self.config['layout'], 'logo') - + + def get_layout(self): + + if not os.path.exists(self.config['layout']['header']) and \ + self.config['layout']['header'] == "scraibe/app/header.html": + + hname = os.path.join(CURRENT_PATH, "header.html") + + header = open(hname).read() + + elif not os.path.exists(self.config['layout']['header']) and self.config['layout']['header'] != "scraibe/app/header.html": + warnings.warn(f"Header file not found: {self.config['layout']['header']} \n" \ + "fall back to default.") + + hname = os.path.join(CURRENT_PATH, "header.html") + + header = open(hname).read() + elif os.path.exists(self.config['layout']['header']): + header = open(self.config['layout']['header']).read() + else: + warnings.warn(f"Header file not found: {self.config['layout']['header']}") + header = None + + + if header != None: + if self.config['layout']['logo'] == "scraibe/app/logo.svg": + header = header.replace("/file=logo.svg", f"/file={os.path.join(CURRENT_PATH, 'logo.svg')}") + elif self.config['layout']['logo'] != "scraibe/app/logo.svg": + header = header.replace("/file=logo.svg", f"/file={self.config['layout']['logo']}") + else: + warnings.warn(f"Logo file not found: {self.config['layout']['logo']}") + + + if self.config['layout']['footer'] != None: + if os.path.exists(self.config['layout']['footer']): + footer = open(self.config['layout']['footer']).read() + elif self.config['layout']['footer'] == None: + footer = None + else: + warnings.warn(f"Footer file not found: {self.config['layout']['footer']}") + else: + footer = None + return {'header' : header , + 'footer' : footer} @staticmethod def check_and_set_path(config_item, key): From ea7117545d0a481bcd2aa0de11cf7bff6c303e77 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Wed, 24 Jan 2024 16:18:42 +0100 Subject: [PATCH 19/42] fixed wrong dict entry --- scraibe/app/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py index 35d2f8e..c9693ef 100644 --- a/scraibe/app/utils.py +++ b/scraibe/app/utils.py @@ -137,7 +137,7 @@ class AppConfig(ConfigLoader): if launch_options.get('auth').pop('auth_enabled'): self.config['launch']['auth'] = (launch_options.get('auth').pop('auth_username'), - launch_options.get('auth').pop('password')) + launch_options.get('auth').pop('auth_password')) else: self.config['launch']['auth'] = None From 5c16b625270b84b6a987d579a4ef825b30d9e39e Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Wed, 24 Jan 2024 16:20:13 +0100 Subject: [PATCH 20/42] fixed typo --- scraibe/app/config.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml index 9f6a826..8e908e6 100644 --- a/scraibe/app/config.yml +++ b/scraibe/app/config.yml @@ -7,8 +7,7 @@ launch: inbrowser: true share : false debug : false - - max-threads: 40 + max_threads: 40 quiet: false auth: auth_enabled: false From c65dc51541d554db8ac25d0ab07358b99da3da5f Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Thu, 25 Jan 2024 09:47:54 +0100 Subject: [PATCH 21/42] typos fixed --- scraibe/app/config.yml | 4 ++-- scraibe/app/utils.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml index 8e908e6..9e42db3 100644 --- a/scraibe/app/config.yml +++ b/scraibe/app/config.yml @@ -4,7 +4,7 @@ launch: server_port: 7860 server_name: 0.0.0.0 inline: false - inbrowser: true + inbrowser: false share : false debug : false max_threads: 40 @@ -16,7 +16,7 @@ launch: auth_message: null prevent_thread_lock : false show_error : false - show_tips : true + show_tips : false favicon_path : null ssl_keyfile : null ssl_certfile : null diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py index c9693ef..816bc4c 100644 --- a/scraibe/app/utils.py +++ b/scraibe/app/utils.py @@ -1,5 +1,3 @@ -from email import header -from math import e import os import warnings import yaml From ef7bd6e15c603d2f6b25a821c94319e1eb377b27 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Thu, 25 Jan 2024 16:08:53 +0100 Subject: [PATCH 22/42] made cli work with new interface --- scraibe/app/app_starter.py | 28 ++++++++++ scraibe/cli.py | 106 ++++++++++++++++++++----------------- scraibe/misc.py | 15 ++++++ 3 files changed, 100 insertions(+), 49 deletions(-) create mode 100644 scraibe/app/app_starter.py diff --git a/scraibe/app/app_starter.py b/scraibe/app/app_starter.py new file mode 100644 index 0000000..9ed1d0b --- /dev/null +++ b/scraibe/app/app_starter.py @@ -0,0 +1,28 @@ +""" +This script is used to start the Gradio interface for audio transcription. +A configuration file can be passed to the script to configure the interface. +If no configuration file is passed, the default configuration is used. +The main Reason for this script is to allow the use of multiprocessing in the app. +""" + +import multiprocessing +from scraibe.misc import ParseKwargs +from argparse import ArgumentParser + +parser = ArgumentParser() + +parser.add_argument("--server-config", type=str, default= None, + help="Path to the configy.yml file.") + +parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={}, + help='Keyword arguments for the Gradio app.') + +args = parser.parse_args() + +if __name__ == '__main__': + + multiprocessing.set_start_method('spawn') + + from scraibe.app.app import app + + app(config = args.server_config, **args.server_kwargs) \ No newline at end of file diff --git a/scraibe/cli.py b/scraibe/cli.py index c023f38..f4b49f7 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -5,10 +5,11 @@ The function includes arguments for specifying the audio files, model paths, output formats, and other options necessary for transcription. """ import os -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, Action import json from .autotranscript import Scraibe +from .misc import ParseKwargs from .app.app import gradio_Interface from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE @@ -41,13 +42,15 @@ def cli(): help="List of audio files to transcribe.") group.add_argument('--start-server', action='store_true', - help='Start the Gradio app.') + help='Start the Gradio app.' \ + 'If set, all other arguments are ignored' \ + 'besides --server-config or --server-kwargs.') - parser.add_argument("--port", type=int, default= None, - help="Port to run the Gradio app on. Defaults to 7860.") + parser.add_argument("--server-config", type=str, default= None, + help="Path to the configy.yml file.") - parser.add_argument("--server-name", type=str, default= None, - help="Name of the Gradio app. If empty 127.0.0.1 or 0.0.0.0 will be used.") + parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={}, + help='Keyword arguments for the Gradio app.') parser.add_argument("--whisper-model-name", default="medium", help="Name of the Whisper model to use.") @@ -66,7 +69,8 @@ def cli(): help="Device to use for PyTorch inference.") parser.add_argument("--num-threads", type=int, default=0, - help="Number of threads used by torch for CPU inference; overrides MKL_NUM_THREADS/OMP_NUM_THREADS.") + help="Number of threads used by torch for CPU inference; '\ + 'overrides MKL_NUM_THREADS/OMP_NUM_THREADS.") parser.add_argument("--output-directory", "-o", type=str, default=".", help="Directory to save the transcription outputs.") @@ -113,55 +117,59 @@ def cli(): if arg_dict["whisper_model_directory"]: class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory") - model = Scraibe(**class_kwargs) - - - if arg_dict["audio_files"]: - audio_files = arg_dict.pop("audio_files") + if not start_server: - if task == "autotranscribe" or task == "autotranscribe+translate": - for audio in audio_files: - if task == "autotranscribe+translate": - task = "translate" - else: - task = "transcribe" - - out = model.autotranscribe(audio,task = task, language=arg_dict.pop("language"), verbose = arg_dict.pop("verbose_output")) - basename = audio.split("/")[-1].split(".")[0] - print(f'Saving {basename}.{out_format} to {out_folder}') - out.save(os.path.join(out_folder, f"{basename}.{out_format}")) - - elif task == "diarization": - for audio in audio_files: - if arg_dict.pop("verbose_output"): - print(f"Verbose not implemented for diarization.") - - out = model.diarization(audio) - basename = audio.split("/")[-1].split(".")[0] - path = os.path.join(out_folder, f"{basename}.{out_format}") - - print(f'Saving {basename}.{out_format} to {out_folder}') - - with open(path, "w") as f: - json.dump(json.dumps(out, indent= 1), f) + model = Scraibe(**class_kwargs) - elif task == "transcribe" or task == "translate": + if arg_dict["audio_files"]: + audio_files = arg_dict.pop("audio_files") - for audio in audio_files: - - out = model.transcribe(audio, task = task, - language= arg_dict.pop("language"), - verbose = arg_dict.pop("verbose_output")) - basename = audio.split("/")[-1].split(".")[0] - path = os.path.join(out_folder, f"{basename}.{out_format}") - with open(path, "w") as f: - f.write(out) + if task == "autotranscribe" or task == "autotranscribe+translate": + for audio in audio_files: + if task == "autotranscribe+translate": + task = "translate" + else: + task = "transcribe" + + out = model.autotranscribe(audio,task = task, language=arg_dict.pop("language"), verbose = arg_dict.pop("verbose_output")) + basename = audio.split("/")[-1].split(".")[0] + print(f'Saving {basename}.{out_format} to {out_folder}') + out.save(os.path.join(out_folder, f"{basename}.{out_format}")) + + elif task == "diarization": + for audio in audio_files: + if arg_dict.pop("verbose_output"): + print(f"Verbose not implemented for diarization.") + + out = model.diarization(audio) + basename = audio.split("/")[-1].split(".")[0] + path = os.path.join(out_folder, f"{basename}.{out_format}") + + print(f'Saving {basename}.{out_format} to {out_folder}') + + with open(path, "w") as f: + json.dump(json.dumps(out, indent= 1), f) + + elif task == "transcribe" or task == "translate": + for audio in audio_files: - if start_server: # unfinished code + out = model.transcribe(audio, task = task, + language= arg_dict.pop("language"), + verbose = arg_dict.pop("verbose_output")) + basename = audio.split("/")[-1].split(".")[0] + path = os.path.join(out_folder, f"{basename}.{out_format}") + with open(path, "w") as f: + f.write(out) + + + else: # unfinished code + import subprocess + import sys - gradio_Interface(model).queue().launch(server_port=args.port, server_name=args.server_name) + execute_path = os.path.join(os.path.dirname(__file__), "app/app_starter.py") + subprocess.run([sys.executable, execute_path]) if __name__ == "__main__": cli() \ No newline at end of file diff --git a/scraibe/misc.py b/scraibe/misc.py index b1afeea..ae9136e 100644 --- a/scraibe/misc.py +++ b/scraibe/misc.py @@ -1,6 +1,7 @@ import os import yaml from pyannote.audio.core.model import CACHE_DIR as PYANNOTE_CACHE_DIR +from argparse import Action CACHE_DIR = os.getenv( "AUTOT_CACHE", @@ -38,3 +39,17 @@ def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> with open(file_path, "w") as stream: yaml.dump(yml, stream) + +class ParseKwargs(Action): + """ + Custom argparse action to parse keyword arguments. + """ + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, dict()) + for value in values: + key, value = value.split('=') + try: + value = eval(value) + except: + pass + getattr(namespace, self.dest)[key] = value \ No newline at end of file From ea68b5de5f447c1a9bde22ea1d51bf6b27bd70d6 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 10:31:50 +0100 Subject: [PATCH 23/42] add .yml to package data --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 64d30b9..1e2c641 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -from calendar import c import pkg_resources import os from setuptools import setup, find_packages @@ -21,6 +20,8 @@ with open(verfile, "r") as fp: build_version = "SCRAIBE_BUILD" in os.environ +version["ISRELEASED"] = True if "ISRELEASED" in os.environ else False + if __name__ == "__main__": setup( @@ -53,7 +54,7 @@ if __name__ == "__main__": keywords = ['transcription', 'speech recognition', 'whisper', 'pyannote', 'audio', 'ScrAIbe', 'scraibe', 'speech-to-text', 'speech-to-text transcription', 'speech-to-text recognition', 'voice-to-speech'], - package_data={'scraibe.app' : ["*.html", "*.svg"]}, + package_data={'scraibe.app' : ["*.html", "*.svg","*.yml"]}, entry_points={'console_scripts': ['scraibe = scraibe.cli:cli']} From 250c95535e0b1e1e248248d9f5f729dee0b174ef Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 13:31:02 +0100 Subject: [PATCH 24/42] fixed unused import --- scraibe/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scraibe/cli.py b/scraibe/cli.py index f4b49f7..618f6d8 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -5,7 +5,7 @@ The function includes arguments for specifying the audio files, model paths, output formats, and other options necessary for transcription. """ import os -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, Action +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter import json from .autotranscript import Scraibe From 74eba1c641c7bf397078914ee3c3ae97b9242b22 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 13:31:32 +0100 Subject: [PATCH 25/42] added slightly more robust erroir haneling using local models --- scraibe/diarisation.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/scraibe/diarisation.py b/scraibe/diarisation.py index f90bcdb..3e6047c 100644 --- a/scraibe/diarisation.py +++ b/scraibe/diarisation.py @@ -27,7 +27,9 @@ Usage: diarisation_output = model.diarization("path/to/audiofile.wav") """ +import warnings import os +import yaml from pathlib import Path from typing import TypeVar, Union @@ -213,7 +215,39 @@ class Diariser: model = 'pyannote/speaker-diarization' elif not os.path.exists(model) and use_auth_token is not None: model = 'pyannote/speaker-diarization' - + elif os.path.exists(model) and not use_auth_token: + # check if model can be found locally nearby the config file + with open(model, 'r') as file: + config = yaml.safe_load(file) + + path_to_model = config['pipeline']['params']['segmentation'] + + if not os.path.exists(path_to_model): + warnings.warn(f"Model not found at {path_to_model}. "\ + "Trying to find it nearby the config file.") + + pwd = file.split("/")[:-1] + path_to_model = os.path.join(pwd, "pytorch_model.bin") + + if not os.path.exists(path_to_model): + warnings.warn(f"Model not found at {path_to_model}. \ + 'Trying to find it nearby .bin files instead.") + # list elementes with the ending .bin + bin_files = [f for f in os.listdir(pwd) if f.endswith(".bin")] + if len(bin_files) == 1: + path_to_model = os.path.join(pwd, bin_files[0]) + else: + warnings.warn("Found more than one .bin file. "\ + "or none. Please specify the path to the model " \ + "or setup a huggingface token.") + + warnings.warn(f"Found model at {path_to_model} overwriting config file.") + + config['pipeline']['params']['segmentation'] = path_to_model + + with open(model, 'w') as file: + yaml.dump(config, file) + _model = Pipeline.from_pretrained(model, use_auth_token = use_auth_token, cache_dir = cache_dir, From 25217533cbe526f979a15074ff3c2f97ef37dbd9 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 13:37:28 +0100 Subject: [PATCH 26/42] removed qtfaststart beacaue it is not used --- scraibe/app/__init__.py | 1 - scraibe/app/qtfaststart.py | 319 ------------------------------------- 2 files changed, 320 deletions(-) delete mode 100644 scraibe/app/qtfaststart.py diff --git a/scraibe/app/__init__.py b/scraibe/app/__init__.py index a38ad86..bdf5464 100644 --- a/scraibe/app/__init__.py +++ b/scraibe/app/__init__.py @@ -1,4 +1,3 @@ -from .qtfaststart import * from .multi import * from .interface import * from .stg import * diff --git a/scraibe/app/qtfaststart.py b/scraibe/app/qtfaststart.py deleted file mode 100644 index e57eb20..0000000 --- a/scraibe/app/qtfaststart.py +++ /dev/null @@ -1,319 +0,0 @@ -""" -This file contains a modified version of qtfaststart by qtfaststart -https://github.com/danielgtaylor/qtfaststart/tree/master - -All credit goes to the original author. -Copyright (C) 2008 - 2013 Daniel G. Taylor -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the -Software, and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies -or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. -""" - -import logging -import os -import struct -import collections -import io - -# define error classes -class FastStartException(Exception): - """ - Raised when something bad happens during processing. - """ - pass - -class FastStartSetupError(FastStartException): - """ - Rasised when asked to process a file that does not need processing - """ - pass - -class MalformedFileError(FastStartException): - """ - Raised when the input file is setup in an unexpected way - """ - pass - -class UnsupportedFormatError(FastStartException): - """ - Raised when a movie file is recognized as a format not supported. - """ - pass - -# define constants -CHUNK_SIZE = 8192 - -log = logging.getLogger("qtfaststart") - -# Older versions of Python require this to be defined -if not hasattr(os, 'SEEK_CUR'): - os.SEEK_CUR = 1 - -Atom = collections.namedtuple('Atom', 'name position size') - -def read_atom(datastream): - """ - Read an atom and return a tuple of (size, type) where size is the size - in bytes (including the 8 bytes already read) and type is a "fourcc" - like "ftyp" or "moov". - """ - size, type = struct.unpack(">L4s", datastream.read(8)) - type = type.decode('ascii') - return size, type - - -def _read_atom_ex(datastream): - """ - Read an Atom from datastream - """ - pos = datastream.tell() - atom_size, atom_type = read_atom(datastream) - if atom_size == 1: - atom_size, = struct.unpack(">Q", datastream.read(8)) - return Atom(atom_type, pos, atom_size) - - -def get_index(datastream): - """ - Return an index of top level atoms, their absolute byte-position in the - file and their size in a list: - - index = [ - ("ftyp", 0, 24), - ("moov", 25, 2658), - ("free", 2683, 8), - ... - ] - - The tuple elements will be in the order that they appear in the file. - """ - log.debug("Getting index of top level atoms...") - - index = list(_read_atoms(datastream)) - _ensure_valid_index(index) - - return index - - -def _read_atoms(datastream): - """ - Read atoms until an error occurs - """ - while datastream: - try: - atom = _read_atom_ex(datastream) - log.debug("%s: %s" % (atom.name, atom.size)) - except: - break - - yield atom - - if atom.size == 0: - if atom.name == "mdat": - # Some files may end in mdat with no size set, which generally - # means to seek to the end of the file. We can just stop indexing - # as no more entries will be found! - break - else: - # Weird, but just continue to try to find more atoms - continue - - datastream.seek(atom.position + atom.size) - - -def _ensure_valid_index(index): - """ - Ensure the minimum viable atoms are present in the index. - - Raise FastStartException if not. - """ - top_level_atoms = set([item.name for item in index]) - for key in ["moov", "mdat"]: - if key not in top_level_atoms: - log.error("%s atom not found, is this a valid MOV/MP4 file?" % key) - raise FastStartException() - - -def find_atoms(size, datastream): - """ - Compatibilty interface for _find_atoms_ex - """ - fake_parent = Atom('fake', datastream.tell()-8, size+8) - for atom in _find_atoms_ex(fake_parent, datastream): - yield atom.name - - -def _find_atoms_ex(parent_atom, datastream): - """ - Yield either "stco" or "co64" Atoms from datastream. - datastream will be 8 bytes into the stco or co64 atom when the value - is yielded. - - It is assumed that datastream will be at the end of the atom after - the value has been yielded and processed. - - parent_atom is the parent atom, a 'moov' or other ancestor of CO - atoms in the datastream. - """ - stop = parent_atom.position + parent_atom.size - - while datastream.tell() < stop: - try: - atom = _read_atom_ex(datastream) - except: - log.exception("Error reading next atom!") - raise FastStartException() - - if atom.name in ["trak", "mdia", "minf", "stbl"]: - # Known ancestor atom of stco or co64, search within it! - for res in _find_atoms_ex(atom, datastream): - yield res - elif atom.name in ["stco", "co64"]: - yield atom - else: - # Ignore this atom, seek to the end of it. - datastream.seek(atom.position + atom.size) - - -def process(infilename, limit=float('inf')): - """ - Convert a Quicktime/MP4 file for streaming by moving the metadata to - the front of the file. This method writes a new file. - - If limit is set to something other than zero it will be used as the - number of bytes to write of the atoms following the moov atom. This - is very useful to create a small sample of a file with full headers, - which can then be used in bug reports and such. - """ - if isinstance(infilename, str): - datastream = open(infilename, "rb") - elif isinstance(infilename, bytes): - datastream = io.BytesIO(infilename) - else: - raise TypeError("infilename must be a filename, bytes or file-like object") - # Get the top level atom index - index = get_index(datastream) - - mdat_pos = 999999 - free_size = 0 - - # Make sure moov occurs AFTER mdat, otherwise no need to run! - for atom in index: - # The atoms are guaranteed to exist from get_index above! - if atom.name == "moov": - moov_atom = atom - moov_pos = atom.position - elif atom.name == "mdat": - mdat_pos = atom.position - elif atom.name == "free" and atom.position < mdat_pos: - # This free atom is before the mdat! - free_size += atom.size - log.info("Removing free atom at %d (%d bytes)" % (atom.position, atom.size)) - elif atom.name == "\x00\x00\x00\x00" and atom.position < mdat_pos: - # This is some strange zero atom with incorrect size - free_size += 8 - log.info("Removing strange zero atom at %s (8 bytes)" % atom.position) - - # Offset to shift positions - offset = moov_atom.size - free_size - - if moov_pos < mdat_pos: - # moov appears to be in the proper place, don't shift by moov size - offset -= moov_atom.size - if not free_size: - # No free atoms and moov is correct, we are done! - log.error("This file appears to already be setup for streaming!") - # Stupid hack to retrun the non-processed file: - if isinstance(infilename, str): - return open(infilename, "rb").read() - elif isinstance(infilename, bytes): - return io.BytesIO(infilename).read() - - # Read and fix moov - moov = _patch_moov(datastream, moov_atom, offset) - - log.info("Writing output...") - outfile = b'' - - # Write ftype - for atom in index: - if atom.name == "ftyp": - log.debug("Writing ftyp... (%d bytes)" % atom.size) - datastream.seek(atom.position) - outfile += datastream.read(atom.size) - - # Write moov - _bytes = moov.getvalue() - log.debug("Writing moov... (%d bytes)" % len(_bytes)) - outfile += _bytes - - # Write the rest - atoms = [item for item in index if item.name not in ["ftyp", "moov", "free"]] - for atom in atoms: - log.debug("Writing %s... (%d bytes)" % (atom.name, atom.size)) - datastream.seek(atom.position) - - # for compatability, allow '0' to mean no limit - cur_limit = limit or float('inf') - cur_limit = min(cur_limit, atom.size) - - for chunk in get_chunks(datastream, CHUNK_SIZE, cur_limit): - outfile += chunk - - return outfile - - -def _patch_moov(datastream, atom, offset): - datastream.seek(atom.position) - moov = io.BytesIO(datastream.read(atom.size)) - - # reload the atom from the fixed stream - atom = _read_atom_ex(moov) - - for atom in _find_atoms_ex(atom, moov): - # Read either 32-bit or 64-bit offsets - ctype, csize = dict( - stco=('L', 4), - co64=('Q', 8), - )[atom.name] - - # Get number of entries - version, entry_count = struct.unpack(">2L", moov.read(8)) - - log.info("Patching %s with %d entries" % (atom.name, entry_count)) - - entries_pos = moov.tell() - - struct_fmt = ">%(entry_count)s%(ctype)s" % vars() - - # Read entries - entries = struct.unpack(struct_fmt, moov.read(csize * entry_count)) - - # Patch and write entries - offset_entries = [entry + offset for entry in entries] - moov.seek(entries_pos) - moov.write(struct.pack(struct_fmt, *offset_entries)) - return moov - -def get_chunks(stream, chunk_size, limit): - remaining = limit - while remaining: - chunk = stream.read(min(remaining, chunk_size)) - if not chunk: - return - remaining -= len(chunk) - yield chunk From c33c0f1f5369038bc47eab5c5cb13995fb9d4592 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 15:09:11 +0100 Subject: [PATCH 27/42] add app file and add gitignore --- .dockerignore | 6 ++++ .gitignore | 6 ++++ scraibe/app/app.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 scraibe/app/app.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1155cba --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +scraibe/*__pycache__ +scraibe/app/*__pycache__ +scraibe/.pyannotetoken +.git +.gitignore +.github diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..18c7986 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +transcibe.py +scraibe/*__pycache__ +scraibe/app/*__pycache__ +scraibe/.pyannotetoken + + diff --git a/scraibe/app/app.py b/scraibe/app/app.py new file mode 100644 index 0000000..798e2be --- /dev/null +++ b/scraibe/app/app.py @@ -0,0 +1,72 @@ +""" +Gradio App. +-------------------------------- + +This module provides an interface to transcribe audio files using the +Scraibe model. Users can either upload an audio file or record their speech +live for transcription. The application supports multiple languages and provides +options to specify the number of speakers and the language of the audio. + +Attributes: + LANGUAGES (list): A list of supported languages for transcription. + +Usage: + Run this script to start the Gradio web interface for audio transcription. + +""" + + +#### +# Gradio Interface +#### + +from threading import Thread + +import scraibe.app.global_var as gv +from .interface import gradio_Interface +from .multi import * +from .utils import * + + +def app(config : str = None, **kwargs): + """ + Launches the Gradio interface for audio transcription. + + Args: + interface_params (dict): A dictionary of parameters for the Gradio interface. + queue_params (dict): A dictionary of parameters for the queue. + launch_params (dict): A dictionary of parameters for launching the interface. + + Returns: + None + + """ + + # Load the configuration + + config = AppConfig.load_config(config, **kwargs) + + + gv.MODEL_PROCESS = start_model_worker(gv.MODEL_PARAMS, + gv.REQUEST_QUEUE, + gv.LAST_ACTIVE_TIME, + gv.RESPONSE_QUEUE, + gv.LOADED_EVENT, + gv.RUNNING_EVENT) + + timer = Thread(target=timer_thread, args=(gv.REQUEST_QUEUE, + gv.LAST_ACTIVE_TIME, + gv.LOADED_EVENT, + gv.RUNNING_EVENT), daemon=True) + layout = config.get_layout() + + timer.start() + + print("Starting Gradio Web Interface") + + gradio_Interface(layout).queue(**config.queue).launch(**config.launch) + + timer.join() + gv.MODEL_PROCESS.join() + + print('') \ No newline at end of file From 8ba3ee146b566b138215ed7e05b7e60b6c97e922 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 15:28:50 +0100 Subject: [PATCH 28/42] Redifine ParseKwargs to not import scraibe to early on --- scraibe/app/app_starter.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/scraibe/app/app_starter.py b/scraibe/app/app_starter.py index 9ed1d0b..e8542c2 100644 --- a/scraibe/app/app_starter.py +++ b/scraibe/app/app_starter.py @@ -6,8 +6,21 @@ The main Reason for this script is to allow the use of multiprocessing in the ap """ import multiprocessing -from scraibe.misc import ParseKwargs -from argparse import ArgumentParser +from argparse import ArgumentParser, Action + +class ParseKwargs(Action): + """ + Custom argparse action to parse keyword arguments. has to bne redifined here because of multiprocessing. + """ + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, dict()) + for value in values: + key, value = value.split('=') + try: + value = eval(value) + except: + pass + getattr(namespace, self.dest)[key] = value parser = ArgumentParser() From b0858e464775941e2624ba3c4953a7f32d941bbf Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 15:33:16 +0100 Subject: [PATCH 29/42] parse args to app_starter.py --- scraibe/cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scraibe/cli.py b/scraibe/cli.py index 618f6d8..2654b9c 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -169,7 +169,10 @@ def cli(): execute_path = os.path.join(os.path.dirname(__file__), "app/app_starter.py") - subprocess.run([sys.executable, execute_path]) + config = arg_dict.pop("server_config") + server_kwargs = arg_dict.pop("server_kwargs") + + subprocess.run([sys.executable, execute_path, f"--server-config={config}", f"--server-kwargs={server_kwargs}"]) if __name__ == "__main__": cli() \ No newline at end of file From c1ed0547b87c9e8f3a9edc88d56b911b23576d0b Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 15:38:53 +0100 Subject: [PATCH 30/42] handle exeptions when dict is empty --- scraibe/cli.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scraibe/cli.py b/scraibe/cli.py index 2654b9c..1c7f320 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -172,7 +172,14 @@ def cli(): config = arg_dict.pop("server_config") server_kwargs = arg_dict.pop("server_kwargs") - subprocess.run([sys.executable, execute_path, f"--server-config={config}", f"--server-kwargs={server_kwargs}"]) + if not config: + subprocess.run([sys.executable, execute_path, f"--server-kwargs={server_kwargs}"]) + elif not server_kwargs: + subprocess.run([sys.executable, execute_path, f"--server-config={config}"]) + elif not config and not server_kwargs: + subprocess.run([sys.executable, execute_path]) + else: + subprocess.run([sys.executable, execute_path, f"--server-config={config}", f"--server-kwargs={server_kwargs}"]) if __name__ == "__main__": cli() \ No newline at end of file From ff47d058c826e99b3bca37de106906bfd4eec11b Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 15:46:51 +0100 Subject: [PATCH 31/42] fixed typo --- scraibe/diarisation.py | 50 +++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/scraibe/diarisation.py b/scraibe/diarisation.py index 3e6047c..c62bda0 100644 --- a/scraibe/diarisation.py +++ b/scraibe/diarisation.py @@ -219,34 +219,34 @@ class Diariser: # check if model can be found locally nearby the config file with open(model, 'r') as file: config = yaml.safe_load(file) + + path_to_model = config['pipeline']['params']['segmentation'] + + if not os.path.exists(path_to_model): + warnings.warn(f"Model not found at {path_to_model}. "\ + "Trying to find it nearby the config file.") - path_to_model = config['pipeline']['params']['segmentation'] + pwd = model.split("/")[:-1] + path_to_model = os.path.join(pwd, "pytorch_model.bin") if not os.path.exists(path_to_model): - warnings.warn(f"Model not found at {path_to_model}. "\ - "Trying to find it nearby the config file.") - - pwd = file.split("/")[:-1] - path_to_model = os.path.join(pwd, "pytorch_model.bin") - - if not os.path.exists(path_to_model): - warnings.warn(f"Model not found at {path_to_model}. \ - 'Trying to find it nearby .bin files instead.") - # list elementes with the ending .bin - bin_files = [f for f in os.listdir(pwd) if f.endswith(".bin")] - if len(bin_files) == 1: - path_to_model = os.path.join(pwd, bin_files[0]) - else: - warnings.warn("Found more than one .bin file. "\ - "or none. Please specify the path to the model " \ - "or setup a huggingface token.") - - warnings.warn(f"Found model at {path_to_model} overwriting config file.") - - config['pipeline']['params']['segmentation'] = path_to_model - - with open(model, 'w') as file: - yaml.dump(config, file) + warnings.warn(f"Model not found at {path_to_model}. \ + 'Trying to find it nearby .bin files instead.") + # list elementes with the ending .bin + bin_files = [f for f in os.listdir(pwd) if f.endswith(".bin")] + if len(bin_files) == 1: + path_to_model = os.path.join(pwd, bin_files[0]) + else: + warnings.warn("Found more than one .bin file. "\ + "or none. Please specify the path to the model " \ + "or setup a huggingface token.") + + warnings.warn(f"Found model at {path_to_model} overwriting config file.") + + config['pipeline']['params']['segmentation'] = path_to_model + + with open(model, 'w') as file: + yaml.dump(config, file) _model = Pipeline.from_pretrained(model, use_auth_token = use_auth_token, From b45a6011193f81cf64859c990faf7440af5e7022 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 15:56:10 +0100 Subject: [PATCH 32/42] fixed path --- scraibe/diarisation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scraibe/diarisation.py b/scraibe/diarisation.py index c62bda0..570ac29 100644 --- a/scraibe/diarisation.py +++ b/scraibe/diarisation.py @@ -227,6 +227,8 @@ class Diariser: "Trying to find it nearby the config file.") pwd = model.split("/")[:-1] + pwd = "/".join(pwd) + path_to_model = os.path.join(pwd, "pytorch_model.bin") if not os.path.exists(path_to_model): From 483204efda38325a61c199ec7b9d5445c5bb5996 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 16:15:58 +0100 Subject: [PATCH 33/42] removed print --- scraibe/app/app.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scraibe/app/app.py b/scraibe/app/app.py index 798e2be..a50dfc2 100644 --- a/scraibe/app/app.py +++ b/scraibe/app/app.py @@ -67,6 +67,4 @@ def app(config : str = None, **kwargs): gradio_Interface(layout).queue(**config.queue).launch(**config.launch) timer.join() - gv.MODEL_PROCESS.join() - - print('') \ No newline at end of file + gv.MODEL_PROCESS.join() \ No newline at end of file From 41be3fdee2f894882c200cec0cdd92f7b1e25f9a Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 26 Jan 2024 16:21:47 +0100 Subject: [PATCH 34/42] use TIMEOUT param --- scraibe/app/app.py | 4 +++- scraibe/app/multi.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scraibe/app/app.py b/scraibe/app/app.py index a50dfc2..d1c27d7 100644 --- a/scraibe/app/app.py +++ b/scraibe/app/app.py @@ -57,7 +57,9 @@ def app(config : str = None, **kwargs): timer = Thread(target=timer_thread, args=(gv.REQUEST_QUEUE, gv.LAST_ACTIVE_TIME, gv.LOADED_EVENT, - gv.RUNNING_EVENT), daemon=True) + gv.RUNNING_EVENT, + gv.TIMEOUT), daemon=True) + layout = config.get_layout() timer.start() diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py index ec9f17e..487cade 100644 --- a/scraibe/app/multi.py +++ b/scraibe/app/multi.py @@ -84,7 +84,7 @@ def timer_thread(request_queue, last_active_time, loaded_event, running_event, - timeout=30): + timeout): while True: time.sleep(timeout) From 6975986ed4e3b4c7d03bd14b6d19cb702cccf028 Mon Sep 17 00:00:00 2001 From: "Schmieder, Jacob" Date: Mon, 29 Jan 2024 12:32:23 +0000 Subject: [PATCH 35/42] added docstings and typings --- scraibe/app/app.py | 56 +++++++++--- scraibe/app/app_starter.py | 43 ++++++++-- scraibe/app/config.yml | 1 - scraibe/app/global_var.py | 39 ++++++--- scraibe/app/interactions.py | 2 - scraibe/app/interface.py | 30 ++++++- scraibe/app/multi.py | 103 +++++++++++++++++----- scraibe/app/stg.py | 108 +++++++++++++++++------- scraibe/app/utils.py | 164 +++++++++++++++++++++++++++--------- 9 files changed, 410 insertions(+), 136 deletions(-) diff --git a/scraibe/app/app.py b/scraibe/app/app.py index d1c27d7..76b189a 100644 --- a/scraibe/app/app.py +++ b/scraibe/app/app.py @@ -1,21 +1,35 @@ """ -Gradio App. --------------------------------- +Gradio App +---------- This module provides an interface to transcribe audio files using the Scraibe model. Users can either upload an audio file or record their speech live for transcription. The application supports multiple languages and provides -options to specify the number of speakers and the language of the audio. +options to specify the number of speakers and the language of the audio. It also +enables efficient management of resources by loading and unloading AI models +based on usage. -Attributes: - LANGUAGES (list): A list of supported languages for transcription. +The configuration is managed via a 'config.yml' file, which allows customization +of various aspects of the application, including the Gradio interface, queue +management, and model parameters. + +Configuration Sections in 'config.yml': +- launch: Settings for launching the interface, such as server port, authentication, SSL configuration. +- queue: Configuration for managing request handling and concurrency. +- layout: Customization options for the interface layout, like headers, footers, and logos. +- model: Specifications for different AI models used in transcription. +- advanced: Advanced settings, including session timeout duration. + +Note: + The .queue function of the Gradio interface is currently experiencing issues + and might not work as expected. Usage: Run this script to start the Gradio web interface for audio transcription. - """ + #### # Gradio Interface #### @@ -31,18 +45,27 @@ from .utils import * def app(config : str = None, **kwargs): """ Launches the Gradio interface for audio transcription. - + + Initializes the Gradio web interface with settings from a YAML configuration file + and/or keyword arguments. The function manages AI models, handling their loading + into RAM and unloading after a session or specified timeout. + + The `kwargs` are used to override or supplement values from the `config.yml` file. + They should follow the structure of `config.yml`, which includes sections like + 'launch', 'queue', 'layout', 'model', and 'advanced'. + Args: - interface_params (dict): A dictionary of parameters for the Gradio interface. - queue_params (dict): A dictionary of parameters for the queue. - launch_params (dict): A dictionary of parameters for launching the interface. - + config (str): Path to the YAML configuration file. Default settings are used + if not provided. + **kwargs: Keyword arguments corresponding to the configuration sections. Each + argument should be a dictionary reflecting the structure of its + respective section in `config.yml`. + Returns: None - """ - - # Load the configuration + + # Load and override configuration from the YAML file with kwargs config = AppConfig.load_config(config, **kwargs) @@ -54,19 +77,24 @@ def app(config : str = None, **kwargs): gv.LOADED_EVENT, gv.RUNNING_EVENT) + # Set the timer thread to manage model loading and unloading timer = Thread(target=timer_thread, args=(gv.REQUEST_QUEUE, gv.LAST_ACTIVE_TIME, gv.LOADED_EVENT, gv.RUNNING_EVENT, gv.TIMEOUT), daemon=True) + # Set the layout for the Gradio interface layout = config.get_layout() + # start the timer thread timer.start() print("Starting Gradio Web Interface") + # Launch the Gradio interface gradio_Interface(layout).queue(**config.queue).launch(**config.launch) + # Wait for the timer thread to finish timer.join() gv.MODEL_PROCESS.join() \ No newline at end of file diff --git a/scraibe/app/app_starter.py b/scraibe/app/app_starter.py index e8542c2..b1597f0 100644 --- a/scraibe/app/app_starter.py +++ b/scraibe/app/app_starter.py @@ -1,18 +1,47 @@ -""" -This script is used to start the Gradio interface for audio transcription. -A configuration file can be passed to the script to configure the interface. -If no configuration file is passed, the default configuration is used. -The main Reason for this script is to allow the use of multiprocessing in the app. +"""Starts the Gradio interface for audio transcription with optional configuration. + +This script, app_starter.py, initializes and runs a Gradio interface for audio +transcription tasks. It allows users to provide a configuration file for custom +settings. If no configuration file is specified, default settings are applied. +The script is designed to support multiprocessing for improved performance. + +Attributes: + args (argparse.Namespace): Parsed command line arguments. + +Example: + To run the script with custom server configuration and keyword arguments: + $ python app_starter.py --server-config path/to/config.yml --server-kwargs key1=val1 key2=val2 """ import multiprocessing from argparse import ArgumentParser, Action class ParseKwargs(Action): - """ - Custom argparse action to parse keyword arguments. has to bne redifined here because of multiprocessing. + """Custom action for argparse to parse keyword arguments for Gradio app configuration. + + This action parses a series of keyword arguments and converts them into a + dictionary, which is then used to configure the Gradio application. It + supports dynamic types by attempting to evaluate the argument values. + + Attributes: + dest (str): The name of the attribute to be added to the object returned by parse_args(). """ def __call__(self, parser, namespace, values, option_string=None): + """Parses keyword arguments and updates the namespace with these arguments as a dictionary. + + For each value provided, this method splits the string on the '=' character + to separate keys and values, attempting to evaluate the values for Python + literals. If evaluation fails, the raw string is used as the value. + + Args: + parser (ArgumentParser): The ArgumentParser object that called this method. + namespace (Namespace): An argparse.Namespace object that will be returned by parse_args(). + values (list of str): List of strings, each representing a key-value pair in 'key=value' format. + option_string (Optional[str]): The option string that was used to invoke this action. + + Raises: + ValueError: If any string in values does not contain the '=' character, indicating an invalid format. + """ setattr(namespace, self.dest, dict()) for value in values: key, value = value.split('=') diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml index 9e42db3..8b25af5 100644 --- a/scraibe/app/config.yml +++ b/scraibe/app/config.yml @@ -22,7 +22,6 @@ launch: ssl_certfile : null ssl_keyfile_password : null ssl_verify : false - quiet : false show_api : false allowed_paths : null blocked_paths : null diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py index 5599320..b1c1c80 100644 --- a/scraibe/app/global_var.py +++ b/scraibe/app/global_var.py @@ -1,24 +1,37 @@ """ -Stores global variables for the app. +global_var.py + +This module stores global variables for the app. + +Global variables: + REQUEST_QUEUE (multiprocessing.Queue): A queue to store audio file paths as strings. + RESPONSE_QUEUE (multiprocessing.Queue): A queue to store transcriptions as strings. + LAST_ACTIVE_TIME (multiprocessing.Value): A value to store the time of the last activity. + LOADED_EVENT (multiprocessing.Event): An event to indicate when the model is loaded. + RUNNING_EVENT (multiprocessing.Event): An event to indicate when the model is running. + MODEL_PARAMS (Optional[dict]): A dictionary to store the model parameters. + MODEL_PROCESS (Optional[multiprocessing.Process]): A process to handle the model globally. + LAST_USED (float): A float to track the time of the last user activity. + TIMEOUT (Optional[int]): An integer to store the timeout in seconds. + DEFAULT_APP_CONIFG_PATH (str): A string to store the default path to the app configuration file. """ -# Global variable to store the model import multiprocessing import os import time +from typing import Optional -REQUEST_QUEUE = multiprocessing.Queue() # audio file path as string -RESPONSE_QUEUE = multiprocessing.Queue() # transcription as string -LAST_ACTIVE_TIME = multiprocessing.Value('d', time.time()) # time of last activity -LOADED_EVENT = multiprocessing.Event() # model loaded event -RUNNING_EVENT = multiprocessing.Event() # model running event +REQUEST_QUEUE: multiprocessing.Queue = multiprocessing.Queue() # audio file path as string +RESPONSE_QUEUE: multiprocessing.Queue = multiprocessing.Queue() # transcription as string +LAST_ACTIVE_TIME: multiprocessing.Value = multiprocessing.Value('d', time.time()) # time of last activity +LOADED_EVENT: multiprocessing.Event = multiprocessing.Event() # model loaded event +RUNNING_EVENT: multiprocessing.Event = multiprocessing.Event() # model running event -MODEL_PARAMS = None # model parameters -MODEL_PROCESS = None # model process to handle globally +MODEL_PARAMS: Optional[dict] = None # model parameters +MODEL_PROCESS: Optional[multiprocessing.Process] = None # model process to handle globally # Global variable to track user activity -LAST_USED = time.time() -TIMEOUT = None #seconds - -DEFAULT_APP_CONIFG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.yml") +LAST_USED: float = time.time() +TIMEOUT: Optional[int] = None # seconds +DEFAULT_APP_CONIFG_PATH: str = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.yml") \ No newline at end of file diff --git a/scraibe/app/interactions.py b/scraibe/app/interactions.py index 1719388..9206e75 100644 --- a/scraibe/app/interactions.py +++ b/scraibe/app/interactions.py @@ -3,8 +3,6 @@ This file contains ervery function that will be called when the user interacts w UI like pressing a button or uploading a file. """ -from re import M -import time import gradio as gr import scraibe.app.global_var as gv from scraibe import Transcript diff --git a/scraibe/app/interface.py b/scraibe/app/interface.py index fce582c..bab29a9 100644 --- a/scraibe/app/interface.py +++ b/scraibe/app/interface.py @@ -1,11 +1,20 @@ """ -This file contains the actual gradio Interface which is used to interact with the user. +This module contains the gradio Interface which is used to interact with the user. + +The interface is themed with a soft color scheme, with primary colors of green and orange, and a neutral color of gray. + +A list of languages is also defined in this module, which may be used elsewhere in the application. + +Classes: + Soft: A class from the gradio library used to theme the interface. + +Variables: + theme (gr.themes.Soft): The theme for the gradio interface. + LANGUAGES (list of str): A list of languages supported by the application. """ import gradio as gr -import os -import scraibe.app.global_var as gv from .interactions import * from .stg import * @@ -35,7 +44,20 @@ LANGUAGES = [ def gradio_Interface(layout = None,): - + """ + Creates a gradio interface for audio transcription. + + The interface includes options for the user to select the task, number of speakers, translation, language, and input type. + It also provides options for the user to upload or record audio/video, or upload files. + The output of the transcription is displayed in a textbox, and the JSON output in a JSON viewer. + The user can also annotate the output by naming the speakers. + + Args: + layout (dict, optional): A dictionary containing layout information. Defaults to None. + + Returns: + gr.Blocks: A gradio Blocks object representing the interface. + """ with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: # Define components diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py index 487cade..fe636a0 100644 --- a/scraibe/app/multi.py +++ b/scraibe/app/multi.py @@ -1,14 +1,30 @@ """ -This file contains the functions which are related to monitoring the actual app usage. -Therefore, the app is to be more efficient in the usage of the resources. -By for example, unloading or reloading the model. -""" +This module contains functions for managing and optimizing the resource usage of the application. +The functions in this module monitor the application's usage and make adjustments to improve efficiency. +This includes managing the loading and unloading of the model based on the application's activity. +This dynamic management of resources helps to ensure that the application uses only the resources it needs, +improving overall performance and reducing unnecessary resource consumption. + +Functions: + clear_queue(queue): Clears all items from the queue. + model_worker(model_params, request_queue, last_active_time, + response_queue, loaded_event, running_event, *args, **kwargs): Manages the model worker process. + +Modules: + time: Provides various time-related functions. + gc: Provides an interface to the garbage collector. + multiprocessing: Provides support for parallel execution of code. + torch: Provides tensor computation and deep learning functionality. + gradio: Provides a simple way to create interactive UIs for Python functions. + scraibe.autotranscript: Provides automatic transcription functionality. + .stg: Contains the GradioTranscriptionInterface class. +""" import time import gc -from typing import Union +from typing import Union, Any import multiprocessing import torch @@ -24,12 +40,27 @@ def clear_queue(queue): continue def model_worker(model_params : Union[Scraibe, dict], - request_queue, - last_active_time, - response_queue, - loaded_event, - running_event, - *args, **kwargs): + request_queue: multiprocessing.Queue, + last_active_time: multiprocessing.Value, + response_queue: multiprocessing.Queue, + loaded_event: multiprocessing.Event, + running_event: multiprocessing.Event, + *args: Any, **kwargs: Any) -> None: + """ + Manages the model worker process. + + The model worker process is responsible for running the model and returning the results. + + Args: + model_params (Union[Scraibe, dict]): The parameters for the Scraibe model. + request_queue (multiprocessing.Queue): The queue for incoming requests. + last_active_time (multiprocessing.Value): The last time the model was active. + response_queue (multiprocessing.Queue): The queue for outgoing responses. + loaded_event (multiprocessing.Event): An event that signals when the model is loaded. + running_event (multiprocessing.Event): An event that signals when the model is running. + *args: Additional arguments. + **kwargs: Additional keyword arguments. + """ loaded_event.set() @@ -68,23 +99,49 @@ def model_worker(model_params : Union[Scraibe, dict], clear_queue(response_queue) loaded_event.clear() -def start_model_worker(model_params, - request_queue, - last_active_time, - response_queue, - loaded_event, - running_event, - *args, **kwargs): +def start_model_worker(model_params: Union[Scraibe, dict], + request_queue: multiprocessing.Queue, + last_active_time: multiprocessing.Value, + response_queue: multiprocessing.Queue, + loaded_event: multiprocessing.Event, + running_event: multiprocessing.Event, + *args: Any, **kwargs: Any) -> multiprocessing.Process: + """ + Starts the model worker process. + + Args: + model_params (Union[Scraibe, dict]): The parameters for the Scraibe model. + request_queue (multiprocessing.Queue): The queue for incoming requests. + last_active_time (multiprocessing.Value): The last time the model was active. + response_queue (multiprocessing.Queue): The queue for outgoing responses. + loaded_event (multiprocessing.Event): An event that signals when the model is loaded. + running_event (multiprocessing.Event): An event that signals when the model is running. + *args: Additional arguments. + **kwargs: Additional keyword arguments. + + Returns: + multiprocessing.Process: The model worker process. + """ context = multiprocessing.get_context('spawn') model_process = context.Process(target=model_worker, args=(model_params, request_queue, last_active_time, response_queue,loaded_event, running_event, *args), kwargs=kwargs) model_process.start() return model_process -def timer_thread(request_queue, - last_active_time, - loaded_event, - running_event, - timeout): +def timer_thread(request_queue: multiprocessing.Queue, + last_active_time: multiprocessing.Value, + loaded_event: multiprocessing.Event, + running_event: multiprocessing.Event, + timeout: int) -> None: + """ + Monitors the model worker process and stops it after a period of inactivity. + + Args: + request_queue (multiprocessing.Queue): The queue for incoming requests. + last_active_time (multiprocessing.Value): The last time the model was active. + loaded_event (multiprocessing.Event): An event that signals when the model is loaded. + running_event (multiprocessing.Event): An event that signals when the model is running. + timeout (int): The period of inactivity after which the model worker process is stopped. + """ while True: time.sleep(timeout) diff --git a/scraibe/app/stg.py b/scraibe/app/stg.py index 1b9caf7..4ce31a1 100644 --- a/scraibe/app/stg.py +++ b/scraibe/app/stg.py @@ -1,41 +1,65 @@ """ -stg - scraibe to gradio interface +stg - Scraibe to Gradio Interface -This file contains the code for the scraibe to gradio interface. -It makes adds gradio interactions to the scraibe class in the back. +This module provides an interface between the Scraibe transcription system and the Gradio user interface. +It defines a class, GradioTranscriptionInterface, that wraps the Scraibe model and provides methods for performing transcription tasks through the Gradio UI. +Modules: + json: Used for encoding and decoding JSON data. + gradio as gr: Used for creating the Gradio UI. + tqdm: Used for displaying progress bars. + scraibe.app.global_var as gv: Contains global variables for the Scraibe app. """ - import json import gradio as gr from tqdm import tqdm +from typing import Any, Dict, Union, Tuple, List + -import scraibe.app.global_var as gv class GradioTranscriptionInterface: """ - Interface handling the interaction between Gradio UI and the Audio Transcription system. + A class that provides an interface between the Gradio UI and the Scraibe transcription system. + + This class wraps the Scraibe model and provides methods for performing transcription tasks through the Gradio UI. + These tasks include auto transcription, transcription, and diarisation. + + Attributes: + model (Scraibe): The Scraibe model for performing transcription tasks. """ - def __init__(self, model): + def __init__(self, model) -> None: """ - Initializes the GradioTranscriptionInterface with a transcription model. + Initializes the GradioTranscriptionInterface with a Scraibe model. Args: - model (Scraibe): Model responsible for audio transcription tasks. + model (Scraibe): The Scraibe model for performing transcription tasks. + *args (Any): Additional positional arguments. + **kwargs (Dict[str, Any]): Additional keyword arguments. """ - self.model = model + + self.model = model - def autotranscribe(self, source, - num_speakers : int, - translate : bool, - language : str,*args ,**kwargs): + def autotranscribe(self, source: Union[str, List[str]], + num_speakers: int, + translate: bool, + language: str, + *args: Any, **kwargs: Dict[str, Any]) -> Tuple[str, Union[str, dict]]: """ - Shortcut method for the Scraibe task. + Performs auto transcription on the given source. + + Args: + source (Union[str, List[str]]): The source to transcribe. This can be a string representing a single source, + or a list of strings representing multiple sources. + num_speakers (int): The number of speakers in the source. + translate (bool): Whether to translate the transcription. + language (str): The language of the source. + *args (Any): Additional positional arguments. + **kwargs (Dict[str, Any]): Additional keyword arguments. Returns: - tuple: Transcribed text (str), JSON output (dict) + Tuple[str, Union[str, dict]]: A tuple containing the transcribed text (str) and the JSON output (str or dict). """ _kwargs = { @@ -82,12 +106,23 @@ class GradioTranscriptionInterface: raise gr.Error("Please provide a valid audio file.") - def transcribe(self, source, translate, language,*args ,**kwargs): + def transcribe(self, source: Union[str, List[str]], + translate: bool, + language: str, + *args: Any, **kwargs: Dict[str, Any]) -> str: """ - Shortcut method for the Transcribe task. + Performs transcription on the given source. + + Args: + source (Union[str, List[str]]): The source to transcribe. + This can be a string representing a single source, or a list of strings representing multiple sources. + translate (bool): Whether to translate the transcription. + language (str): The language of the source. + *args (Any): Additional positional arguments. + **kwargs (Dict[str, Any]): Additional keyword arguments. Returns: - str: Transcribed text. + str: The transcribed text. """ _kwargs = { @@ -118,13 +153,24 @@ class GradioTranscriptionInterface: else: raise gr.Error("Please provide a valid audio file.") - def diarisation(self, source, num_speakers, *args ,**kwargs): + def diarisation(self, source: Union[str, List[str]], + num_speakers: int, + *args: Any, **kwargs: Dict[str, Any]) -> str: """ - Shortcut method for the Diarisation task. + Performs diarisation on the given source. + + Args: + source (Union[str, List[str]]): The source to perform diarisation on. + This can be a string representing a single source, + or a list of strings representing multiple sources. + num_speakers (int): The number of speakers in the source. + *args (Any): Additional positional arguments. + **kwargs (Dict[str, Any]): Additional keyword arguments. Returns: - str: JSON output of diarisation result. + str: The JSON output of the diarisation result. """ + _kwargs = { "num_speakers": num_speakers if num_speakers != 0 else None, @@ -160,16 +206,16 @@ class GradioTranscriptionInterface: else: gr.Error("Please provide a valid audio file.") - def get_task_from_str(self, task): + def get_task_from_str(self, task: str) -> callable: + """ + Returns the corresponding task function based on the given task string. + + Args: + task (str): The task string. This can be one of the following: 'Auto Transcribe', 'Transcribe', 'Diarisation'. + + Returns: + callable: The corresponding task function. """ - Returns the coresponing task function based on the task string. - - params: - task (str): Task string. Can be one of the following: - - 'Auto Transcribe' - - 'Transcribe' - - 'Diarisation' - """ if task == 'Auto Transcribe': return self.autotranscribe diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py index 816bc4c..593db98 100644 --- a/scraibe/app/utils.py +++ b/scraibe/app/utils.py @@ -1,22 +1,51 @@ +""" +utils.py + +This module contains two classes, ConfigLoader and AppConfig, which are used to manage application-specific configuration settings. + +The ConfigLoader class provides methods for loading a configuration file, applying overrides, and restoring default values for specified keys. It also includes methods for recursively updating nested keys and getting the default configuration. + +The AppConfig class extends ConfigLoader and provides additional methods for setting global variables, launch options, and layout options from the configuration. It also includes methods for checking and setting file paths, and getting layout options. + +Classes: + ConfigLoader: Manages application-specific configuration settings. + AppConfig: Extends ConfigLoader to provide additional methods for managing application-specific configuration settings. +""" import os import warnings import yaml +from typing import Any, Dict, Optional import scraibe.app.global_var as gv CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) class ConfigLoader: - def __init__(self, config): - - self.config = config - - def restore_defaults_for_keys(self, *args): - """ - Restores specified keys to their default values, including nested keys. + """A class that extends ConfigLoader to manage application-specific configuration settings. + + This class provides methods for setting global variables, launch options, and layout options from the configuration. + + Attributes: + config (Dict[str, Any]): The current configuration settings. + launch (Dict[str, Any]): The launch configuration settings. + model (Dict[str, Any]): The model configuration settings. + advanced (Dict[str, Any]): The advanced configuration settings. + queue (Dict[str, Any]): The queue configuration settings. + layout (Dict[str, Any]): The layout configuration settings. + """ + def __init__(self, config: Dict[str, Any]): + """Initializes a new instance of the ConfigLoader class. Args: - keys (list): A list of keys or paths to keys (for nested dictionaries) to restore to default values. + config (dict): The configuration dictionary. + """ + self.config = config + + def restore_defaults_for_keys(self, *args: str): + """Restores specified keys to their default values, including nested keys. + + Args: + *args (str): A list of keys or paths to keys (for nested dictionaries) to restore to default values. Each key or path should be a list of keys leading to the desired key. """ default_config = self.get_default_config() @@ -27,16 +56,15 @@ class ConfigLoader: @classmethod - def load_config(cls, yaml_path = None, **kwargs): - """ - Load the configuration file and apply overrides. + def load_config(cls, yaml_path: Optional[str] = None, **kwargs: Any) -> 'ConfigLoader': + """Load the configuration file and apply overrides. Args: - yaml_path (str): Path to the YAML file containing overrides. + yaml_path (str, optional): Path to the YAML file containing overrides. **kwargs: Additional overrides as keyword arguments. Returns: - Config: A Config object with the loaded configuration. + ConfigLoader: A ConfigLoader object with the loaded configuration. """ # Load the original configuration @@ -54,8 +82,14 @@ class ConfigLoader: return cls(config) @staticmethod - def apply_overrides(orig_dict, override_dict, specific=None): - """ Recursively apply overrides to the configuration, only for specific keys. """ + def apply_overrides(orig_dict: Dict[str, Any], override_dict: Dict[str, Any], specific: Optional[str] = None): + """Recursively apply overrides to the configuration, only for specific keys. + + Args: + orig_dict (Dict[str, Any]): The original dictionary. + override_dict (Dict[str, Any]): The override dictionary. + specific (str, optional): The specific key to override. + """ for key, value in override_dict.items(): if isinstance(value, dict): @@ -80,7 +114,16 @@ class ConfigLoader: @staticmethod def update_nested_key(d, key, value): - """ Recursively search and update the key in nested dictionary. """ + """Recursively search and update the key in nested dictionary. + + Args: + d (Dict[str, Any]): The dictionary. + key (str): The key to update. + value (Any): The new value. + + Returns: + bool: True if the key was found and updated, False otherwise. + """ if key in d: d[key] = value @@ -92,16 +135,35 @@ class ConfigLoader: @staticmethod def get_default_config(): - """ Return the default configuration. """ + """Return the default configuration. + + Returns: + Dict[str, Any]: The default configuration. + """ with open(gv.DEFAULT_APP_CONIFG_PATH , 'r') as file: config = yaml.safe_load(file) return config class AppConfig(ConfigLoader): - - def __init__(self, config): - + """A class that extends ConfigLoader to manage application-specific configuration settings. + + This class provides methods for setting global variables, launch options, and layout options from the configuration. + + Attributes: + config (dict): The current configuration settings. + launch (dict): The launch configuration settings. + model (dict): The model configuration settings. + advanced (dict): The advanced configuration settings. + queue (dict): The queue configuration settings. + layout (dict): The layout configuration settings. + """ + def __init__(self, config : Dict[str, Any]): + """Initializes a new instance of the AppConfig class. + + Args: + config (dict): The configuration dictionary. + """ self.config = config self.set_global_vars_from_config() @@ -114,23 +176,28 @@ class AppConfig(ConfigLoader): self.queue = self.config.get("queue") self.layout = self.config.get("layout") - def set_global_vars_from_config(self): - """ - Sets the global variables from a configuration dictionary. - + def set_global_vars_from_config(self) -> None: + """Sets the global variables from a configuration dictionary. + Args: config (dict): A dictionary containing the parameters for the model. Modify the default parameters in the config.yml file. - + Returns: None - """ gv.MODEL_PARAMS = self.config.get('model') gv.TIMEOUT = self.config.get("advanced").get('timeout') - def set_launch_options(self): - + def set_launch_options(self) -> None: + """Sets the launch options from a configuration dictionary. + + Args: + None + + Returns: + None + """ launch_options = self.config.get("launch") if launch_options.get('auth').pop('auth_enabled'): @@ -139,13 +206,28 @@ class AppConfig(ConfigLoader): else: self.config['launch']['auth'] = None - def set_layout_options(self): + def set_layout_options(self) -> None: + """Sets the layout options from a configuration dictionary. + + Args: + None + + Returns: + None + """ self.config['layout']['header'] = self.check_and_set_path(self.config['layout'], 'header') self.config['layout']['footer'] = self.check_and_set_path(self.config['layout'], 'footer') self.config['layout']['logo'] = self.check_and_set_path(self.config['layout'], 'logo') - def get_layout(self): - + def get_layout(self) -> Dict[str, str]: + """Gets the layout options from a configuration dictionary. + + Args: + None + + Returns: + dict: A dictionary containing the header and footer layout options. + """ if not os.path.exists(self.config['layout']['header']) and \ self.config['layout']['header'] == "scraibe/app/header.html": @@ -189,10 +271,16 @@ class AppConfig(ConfigLoader): 'footer' : footer} @staticmethod - def check_and_set_path(config_item, key): - """ - Check if the file exists at the given path. If not, try with CURRENT_PATH. + def check_and_set_path(config_item: dict, key: str) -> Optional[str]: + """Check if the file exists at the given path. If not, try with CURRENT_PATH. Raise FileNotFoundError if the file still doesn't exist. + + Args: + config_item (dict): The configuration item. + key (str): The key to check in the configuration item. + + Returns: + str: The path to the file if it exists, None otherwise. """ _current_path = os.path.dirname(os.path.realpath(__file__)) # Define your CURRENT_PATH @@ -207,10 +295,4 @@ class AppConfig(ConfigLoader): else: config_item[key] = new_path - return config_item[key] - - - - - - \ No newline at end of file + return config_item[key] \ No newline at end of file From c70e66ead07b45e75da7dd87630eefe3502a5717 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 12 Feb 2024 11:43:34 +0100 Subject: [PATCH 36/42] removed App here --- scraibe/app/__init__.py | 7 - scraibe/app/app.py | 100 ------------ scraibe/app/app_starter.py | 70 --------- scraibe/app/config.yml | 47 ------ scraibe/app/global_var.py | 37 ----- scraibe/app/header.html | 66 -------- scraibe/app/interactions.py | 153 ------------------ scraibe/app/interface.py | 146 ------------------ scraibe/app/logo.svg | 37 ----- scraibe/app/multi.py | 151 ------------------ scraibe/app/stg.py | 229 --------------------------- scraibe/app/utils.py | 298 ------------------------------------ 12 files changed, 1341 deletions(-) delete mode 100644 scraibe/app/__init__.py delete mode 100644 scraibe/app/app.py delete mode 100644 scraibe/app/app_starter.py delete mode 100644 scraibe/app/config.yml delete mode 100644 scraibe/app/global_var.py delete mode 100644 scraibe/app/header.html delete mode 100644 scraibe/app/interactions.py delete mode 100644 scraibe/app/interface.py delete mode 100644 scraibe/app/logo.svg delete mode 100644 scraibe/app/multi.py delete mode 100644 scraibe/app/stg.py delete mode 100644 scraibe/app/utils.py diff --git a/scraibe/app/__init__.py b/scraibe/app/__init__.py deleted file mode 100644 index bdf5464..0000000 --- a/scraibe/app/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .multi import * -from .interface import * -from .stg import * -from .interactions import * -from .global_var import * -from .utils import * -from .app import * \ No newline at end of file diff --git a/scraibe/app/app.py b/scraibe/app/app.py deleted file mode 100644 index 76b189a..0000000 --- a/scraibe/app/app.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -Gradio App ----------- - -This module provides an interface to transcribe audio files using the -Scraibe model. Users can either upload an audio file or record their speech -live for transcription. The application supports multiple languages and provides -options to specify the number of speakers and the language of the audio. It also -enables efficient management of resources by loading and unloading AI models -based on usage. - -The configuration is managed via a 'config.yml' file, which allows customization -of various aspects of the application, including the Gradio interface, queue -management, and model parameters. - -Configuration Sections in 'config.yml': -- launch: Settings for launching the interface, such as server port, authentication, SSL configuration. -- queue: Configuration for managing request handling and concurrency. -- layout: Customization options for the interface layout, like headers, footers, and logos. -- model: Specifications for different AI models used in transcription. -- advanced: Advanced settings, including session timeout duration. - -Note: - The .queue function of the Gradio interface is currently experiencing issues - and might not work as expected. - -Usage: - Run this script to start the Gradio web interface for audio transcription. -""" - - - -#### -# Gradio Interface -#### - -from threading import Thread - -import scraibe.app.global_var as gv -from .interface import gradio_Interface -from .multi import * -from .utils import * - - -def app(config : str = None, **kwargs): - """ - Launches the Gradio interface for audio transcription. - - Initializes the Gradio web interface with settings from a YAML configuration file - and/or keyword arguments. The function manages AI models, handling their loading - into RAM and unloading after a session or specified timeout. - - The `kwargs` are used to override or supplement values from the `config.yml` file. - They should follow the structure of `config.yml`, which includes sections like - 'launch', 'queue', 'layout', 'model', and 'advanced'. - - Args: - config (str): Path to the YAML configuration file. Default settings are used - if not provided. - **kwargs: Keyword arguments corresponding to the configuration sections. Each - argument should be a dictionary reflecting the structure of its - respective section in `config.yml`. - - Returns: - None - """ - - # Load and override configuration from the YAML file with kwargs - - config = AppConfig.load_config(config, **kwargs) - - - gv.MODEL_PROCESS = start_model_worker(gv.MODEL_PARAMS, - gv.REQUEST_QUEUE, - gv.LAST_ACTIVE_TIME, - gv.RESPONSE_QUEUE, - gv.LOADED_EVENT, - gv.RUNNING_EVENT) - - # Set the timer thread to manage model loading and unloading - timer = Thread(target=timer_thread, args=(gv.REQUEST_QUEUE, - gv.LAST_ACTIVE_TIME, - gv.LOADED_EVENT, - gv.RUNNING_EVENT, - gv.TIMEOUT), daemon=True) - - # Set the layout for the Gradio interface - layout = config.get_layout() - - # start the timer thread - timer.start() - - print("Starting Gradio Web Interface") - - # Launch the Gradio interface - gradio_Interface(layout).queue(**config.queue).launch(**config.launch) - - # Wait for the timer thread to finish - timer.join() - gv.MODEL_PROCESS.join() \ No newline at end of file diff --git a/scraibe/app/app_starter.py b/scraibe/app/app_starter.py deleted file mode 100644 index b1597f0..0000000 --- a/scraibe/app/app_starter.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Starts the Gradio interface for audio transcription with optional configuration. - -This script, app_starter.py, initializes and runs a Gradio interface for audio -transcription tasks. It allows users to provide a configuration file for custom -settings. If no configuration file is specified, default settings are applied. -The script is designed to support multiprocessing for improved performance. - -Attributes: - args (argparse.Namespace): Parsed command line arguments. - -Example: - To run the script with custom server configuration and keyword arguments: - $ python app_starter.py --server-config path/to/config.yml --server-kwargs key1=val1 key2=val2 -""" - -import multiprocessing -from argparse import ArgumentParser, Action - -class ParseKwargs(Action): - """Custom action for argparse to parse keyword arguments for Gradio app configuration. - - This action parses a series of keyword arguments and converts them into a - dictionary, which is then used to configure the Gradio application. It - supports dynamic types by attempting to evaluate the argument values. - - Attributes: - dest (str): The name of the attribute to be added to the object returned by parse_args(). - """ - def __call__(self, parser, namespace, values, option_string=None): - """Parses keyword arguments and updates the namespace with these arguments as a dictionary. - - For each value provided, this method splits the string on the '=' character - to separate keys and values, attempting to evaluate the values for Python - literals. If evaluation fails, the raw string is used as the value. - - Args: - parser (ArgumentParser): The ArgumentParser object that called this method. - namespace (Namespace): An argparse.Namespace object that will be returned by parse_args(). - values (list of str): List of strings, each representing a key-value pair in 'key=value' format. - option_string (Optional[str]): The option string that was used to invoke this action. - - Raises: - ValueError: If any string in values does not contain the '=' character, indicating an invalid format. - """ - setattr(namespace, self.dest, dict()) - for value in values: - key, value = value.split('=') - try: - value = eval(value) - except: - pass - getattr(namespace, self.dest)[key] = value - -parser = ArgumentParser() - -parser.add_argument("--server-config", type=str, default= None, - help="Path to the configy.yml file.") - -parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={}, - help='Keyword arguments for the Gradio app.') - -args = parser.parse_args() - -if __name__ == '__main__': - - multiprocessing.set_start_method('spawn') - - from scraibe.app.app import app - - app(config = args.server_config, **args.server_kwargs) \ No newline at end of file diff --git a/scraibe/app/config.yml b/scraibe/app/config.yml deleted file mode 100644 index 8b25af5..0000000 --- a/scraibe/app/config.yml +++ /dev/null @@ -1,47 +0,0 @@ -launch: - # The following are the default values for the launch configuration - # for more informations look at https://www.gradio.app/docs/interface - server_port: 7860 - server_name: 0.0.0.0 - inline: false - inbrowser: false - share : false - debug : false - max_threads: 40 - quiet: false - auth: - auth_enabled: false - auth_username: admin - auth_password: admin - auth_message: null - prevent_thread_lock : false - show_error : false - show_tips : false - favicon_path : null - ssl_keyfile : null - ssl_certfile : null - ssl_keyfile_password : null - ssl_verify : false - show_api : false - allowed_paths : null - blocked_paths : null - root_path : '' - app_kwargs : null - -queue: - # The following are the default values for the queue configuration - # for more informations look at hhttps://www.gradio.app/docs/interface - concurrency_count : 1 - status_update_rate : 'auto' - api_open : null - max_size : null - -layout: - header: scraibe/app/header.html - footer: null - logo: scraibe/app/logo.svg -model: - whisper_model : null - dia_model: null -advanced: - timeout: 300 #seconds e.g. 5 minutes diff --git a/scraibe/app/global_var.py b/scraibe/app/global_var.py deleted file mode 100644 index b1c1c80..0000000 --- a/scraibe/app/global_var.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -global_var.py - -This module stores global variables for the app. - -Global variables: - REQUEST_QUEUE (multiprocessing.Queue): A queue to store audio file paths as strings. - RESPONSE_QUEUE (multiprocessing.Queue): A queue to store transcriptions as strings. - LAST_ACTIVE_TIME (multiprocessing.Value): A value to store the time of the last activity. - LOADED_EVENT (multiprocessing.Event): An event to indicate when the model is loaded. - RUNNING_EVENT (multiprocessing.Event): An event to indicate when the model is running. - MODEL_PARAMS (Optional[dict]): A dictionary to store the model parameters. - MODEL_PROCESS (Optional[multiprocessing.Process]): A process to handle the model globally. - LAST_USED (float): A float to track the time of the last user activity. - TIMEOUT (Optional[int]): An integer to store the timeout in seconds. - DEFAULT_APP_CONIFG_PATH (str): A string to store the default path to the app configuration file. -""" - -import multiprocessing -import os -import time -from typing import Optional - -REQUEST_QUEUE: multiprocessing.Queue = multiprocessing.Queue() # audio file path as string -RESPONSE_QUEUE: multiprocessing.Queue = multiprocessing.Queue() # transcription as string -LAST_ACTIVE_TIME: multiprocessing.Value = multiprocessing.Value('d', time.time()) # time of last activity -LOADED_EVENT: multiprocessing.Event = multiprocessing.Event() # model loaded event -RUNNING_EVENT: multiprocessing.Event = multiprocessing.Event() # model running event - -MODEL_PARAMS: Optional[dict] = None # model parameters -MODEL_PROCESS: Optional[multiprocessing.Process] = None # model process to handle globally - -# Global variable to track user activity -LAST_USED: float = time.time() -TIMEOUT: Optional[int] = None # seconds - -DEFAULT_APP_CONIFG_PATH: str = os.path.join(os.path.dirname(os.path.realpath(__file__)), "config.yml") \ No newline at end of file diff --git a/scraibe/app/header.html b/scraibe/app/header.html deleted file mode 100644 index 4b12136..0000000 --- a/scraibe/app/header.html +++ /dev/null @@ -1,66 +0,0 @@ - - - - - -
-

ScrAIbe

-
- - - -
-
-
-

- Upload, record, or provide a video with audio for transcription. Our toolkit is designed to transcribe content from multiple languages accurately. The integrated speaker diarisation feature identifies different speakers, ensuring a smooth transcription experience. For optimal results, indicate the number of speakers and the original language of the content. -

-

What would you like to do next?

-
diff --git a/scraibe/app/interactions.py b/scraibe/app/interactions.py deleted file mode 100644 index 9206e75..0000000 --- a/scraibe/app/interactions.py +++ /dev/null @@ -1,153 +0,0 @@ -""" -This file contains ervery function that will be called when the user interacts with the -UI like pressing a button or uploading a file. -""" - -import gradio as gr -import scraibe.app.global_var as gv -from scraibe import Transcript -from .multi import start_model_worker - -def select_task(choice): - # tell the app that it is still in use - if choice == 'Auto Transcribe': - - return (gr.update(visible = True), - gr.update(visible = True), - gr.update(visible = True)) - - - elif choice == 'Transcribe': - - return (gr.update(visible = False), - gr.update(visible = True), - gr.update(visible = True)) - - - elif choice == 'Diarisation': - - return (gr.update(visible = True), - gr.update(visible = False), - gr.update(visible = False)) - -def select_origin(choice): - - # tell the app that it is still in use - if choice == "Upload Audio": - - return (gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Record Audio": - - return (gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Upload Video": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None)) - - elif choice == "Record Video": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True), - gr.update(visible = False, value = None)) - - elif choice == "File or Files": - - return (gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = False, value = None), - gr.update(visible = True)) - -def run_scraibe(task, - num_speakers, - translate, - language, - audio1, - audio2, - video1, - video2, - file_in, - progress = gr.Progress(track_tqdm=False)): - - # get *args which are not None - if gv.MODEL_PROCESS is None or not gv.MODEL_PROCESS.is_alive(): - #progress(0.0, desc='Loading model...') - gv.MODEL_PROCESS = start_model_worker(gv.MODEL_PARAMS, - gv.REQUEST_QUEUE, - gv.LAST_ACTIVE_TIME, - gv.RESPONSE_QUEUE, - gv.LOADED_EVENT, - gv.RUNNING_EVENT) - - # progress(0.1, desc='Starting task...') - source = audio1 or audio2 or video1 or video2 or file_in - - if isinstance(source, list): - source = [s.name for s in source] - if len(source) == 1: - source = source[0] - - config = dict(source = source, - task = task, - num_speakers = num_speakers, - translate = translate, - language = language) - - gv.REQUEST_QUEUE.put(config) - - if task == 'Auto Transcribe': - - out_str , out_json = gv.RESPONSE_QUEUE.get() - - if isinstance(source, str): - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = True), - gr.update(visible = True)) - else: - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = False), - gr.update(visible = False)) - - elif task == 'Transcribe': - - out = gv.RESPONSE_QUEUE.get() - - return (gr.update(value = out, visible = True), - gr.update(value = None, visible = False), - gr.update(visible = False), - gr.update(visible = False)) - - elif task == 'Diarisation': - - out = gv.RESPONSE_QUEUE.get() - - return (gr.update(value = None, visible = False), - gr.update(value = out, visible = True), - gr.update(visible = False), - gr.update(visible = False)) - -def annotate_output(annoation : str, out_json : dict): - # get *args which are not None - - trans = Transcript.from_json(out_json) - trans = trans.annotate(*annoation.split(",")) - - return gr.update(value = str(trans)),gr.update(value = trans.get_json()) - diff --git a/scraibe/app/interface.py b/scraibe/app/interface.py deleted file mode 100644 index bab29a9..0000000 --- a/scraibe/app/interface.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -This module contains the gradio Interface which is used to interact with the user. - -The interface is themed with a soft color scheme, with primary colors of green and orange, and a neutral color of gray. - -A list of languages is also defined in this module, which may be used elsewhere in the application. - -Classes: - Soft: A class from the gradio library used to theme the interface. - -Variables: - theme (gr.themes.Soft): The theme for the gradio interface. - LANGUAGES (list of str): A list of languages supported by the application. -""" - -import gradio as gr - -from .interactions import * -from .stg import * - -theme = gr.themes.Soft( - primary_hue="green", - secondary_hue='orange', - neutral_hue="gray", -) - - -LANGUAGES = [ - "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", - "Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian", - "Czech", "Danish", "Dutch", "English", "Estonian", - "Finnish", "French", "Galician", "German", "Greek", - "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", - "Italian", "Japanese", "Kannada", "Kazakh", "Korean", - "Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi", - "Maori", "Nepali", "Norwegian", "Persian", "Polish", - "Portuguese", "Romanian", "Russian", "Serbian", "Slovak", - "Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog", - "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu", - "Vietnamese", "Welsh" -] - - - - -def gradio_Interface(layout = None,): - """ - Creates a gradio interface for audio transcription. - - The interface includes options for the user to select the task, number of speakers, translation, language, and input type. - It also provides options for the user to upload or record audio/video, or upload files. - The output of the transcription is displayed in a textbox, and the JSON output in a JSON viewer. - The user can also annotate the output by naming the speakers. - - Args: - layout (dict, optional): A dictionary containing layout information. Defaults to None. - - Returns: - gr.Blocks: A gradio Blocks object representing the interface. - """ - with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo: - - # Define components - - - if layout.get('header') is not None: - gr.HTML(layout.get('header'), visible= True, show_label=False) - - with gr.Row(): - - with gr.Column(): - - task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task", - value= 'Auto Transcribe') - - num_speakers = gr.Number(value=0, label= "Number of speakers (optional)", - info = "Number of speakers in the audio file. If you don't know,\ - leave it at 0.", visible= True) - - translate = gr.Checkbox(label="Translation", choices=[True, False], value = False, - info="Select 'Yes' to have the output translated into English.", - visible= True) - - language = gr.Dropdown(LANGUAGES, - label="Language (optional)", value = "None", - info="Language of the audio file. If you don't know,\ - leave it at None.", visible= True) - - input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" - ,"File or Files"], label="Input Type", value="Upload Audio") - - audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", - interactive= True, visible= True) - audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath", - interactive= True, visible= False) - video1 = gr.Video(source="upload", type="filepath", label="Upload Video", - interactive= True, visible= False) - video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True, - interactive= True, visible= False) - file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False) - - submit = gr.Button() - - with gr.Column(): - - out_txt = gr.Textbox(label="Output", - visible= True, show_copy_button=True) - - out_json = gr.JSON(label="JSON Output", - visible= False, show_copy_button=True) - - annoation = gr.Textbox(label="Name your speaker's", - info= "Please provide a list of the speakers arranged \ - in the order in which they appear in the input. Use comma ',' \ - as a seperator. Be aware that the first name is given \ - to SPEAKER_00 the second to SPEAKER_01 and so on.", - visible= False, interactive= True) - - annotate = gr.Button(value="Annotate", visible= False, interactive= True) - - if layout.get('footer') is not None: - gr.HTML(layout.get('footer'), visible= True, show_label=False) - - # Define usage of components - input.change(fn=select_origin, inputs=[input], - outputs=[audio1, audio2, video1, video2, file_in]) - - task.change(fn=select_task, inputs=[task], - outputs=[num_speakers, translate, language]) - - translate.change(fn= lambda x : gr.update(value = x), - inputs=[translate], outputs=[translate]) - num_speakers.change(fn= lambda x : gr.update(value = x), - inputs=[num_speakers], outputs=[num_speakers]) - language.change(fn= lambda x : gr.update(value = x), - inputs=[language], outputs=[language]) - - submit.click(fn = run_scraibe, - inputs=[task, num_speakers, translate, language, audio1, - audio2, video1, video2, file_in], - outputs=[out_txt, out_json, annoation, annotate]) - - annotate.click(fn = annotate_output, inputs=[annoation, out_json], - outputs=[out_txt, out_json]) - - return demo \ No newline at end of file diff --git a/scraibe/app/logo.svg b/scraibe/app/logo.svg deleted file mode 100644 index 54d12d7..0000000 --- a/scraibe/app/logo.svg +++ /dev/null @@ -1,37 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/scraibe/app/multi.py b/scraibe/app/multi.py deleted file mode 100644 index fe636a0..0000000 --- a/scraibe/app/multi.py +++ /dev/null @@ -1,151 +0,0 @@ -""" -This module contains functions for managing and optimizing the resource usage of the application. - -The functions in this module monitor the application's usage and make adjustments to improve efficiency. -This includes managing the loading and unloading of the model based on the application's activity. -This dynamic management of resources helps to ensure that the application uses only the resources it needs, -improving overall performance and reducing unnecessary resource consumption. - -Functions: - clear_queue(queue): Clears all items from the queue. - model_worker(model_params, request_queue, last_active_time, - response_queue, loaded_event, running_event, *args, **kwargs): Manages the model worker process. - -Modules: - time: Provides various time-related functions. - gc: Provides an interface to the garbage collector. - multiprocessing: Provides support for parallel execution of code. - torch: Provides tensor computation and deep learning functionality. - gradio: Provides a simple way to create interactive UIs for Python functions. - scraibe.autotranscript: Provides automatic transcription functionality. - .stg: Contains the GradioTranscriptionInterface class. -""" - - -import time -import gc -from typing import Union, Any -import multiprocessing -import torch - -from gradio import Warning -from scraibe.autotranscript import Scraibe -from .stg import GradioTranscriptionInterface - -def clear_queue(queue): - while not queue.empty(): - try: - queue.get_nowait() - except queue.Empty: - continue - -def model_worker(model_params : Union[Scraibe, dict], - request_queue: multiprocessing.Queue, - last_active_time: multiprocessing.Value, - response_queue: multiprocessing.Queue, - loaded_event: multiprocessing.Event, - running_event: multiprocessing.Event, - *args: Any, **kwargs: Any) -> None: - """ - Manages the model worker process. - - The model worker process is responsible for running the model and returning the results. - - Args: - model_params (Union[Scraibe, dict]): The parameters for the Scraibe model. - request_queue (multiprocessing.Queue): The queue for incoming requests. - last_active_time (multiprocessing.Value): The last time the model was active. - response_queue (multiprocessing.Queue): The queue for outgoing responses. - loaded_event (multiprocessing.Event): An event that signals when the model is loaded. - running_event (multiprocessing.Event): An event that signals when the model is running. - *args: Additional arguments. - **kwargs: Additional keyword arguments. - """ - - loaded_event.set() - - if model_params is None: - _model = Scraibe() - elif type(model_params) is Scraibe: - _model = model_params - elif type(model_params) is dict: - _model = Scraibe(**model_params) - else: - raise TypeError("model must be of type Scraibe, or dict") - - model = GradioTranscriptionInterface(_model) - - while True: - - req = request_queue.get() - - if req == "STOP": - - break - elif type(req) is dict: - runner = model.get_task_from_str(req.pop("task")) - running_event.set() - transcription = runner(**req) - running_event.clear() - response_queue.put(transcription) - last_active_time.value = time.time() - else: - raise TypeError("request must be of type dict") - - del model - torch.cuda.empty_cache() - gc.collect() - clear_queue(request_queue) - clear_queue(response_queue) - loaded_event.clear() - -def start_model_worker(model_params: Union[Scraibe, dict], - request_queue: multiprocessing.Queue, - last_active_time: multiprocessing.Value, - response_queue: multiprocessing.Queue, - loaded_event: multiprocessing.Event, - running_event: multiprocessing.Event, - *args: Any, **kwargs: Any) -> multiprocessing.Process: - """ - Starts the model worker process. - - Args: - model_params (Union[Scraibe, dict]): The parameters for the Scraibe model. - request_queue (multiprocessing.Queue): The queue for incoming requests. - last_active_time (multiprocessing.Value): The last time the model was active. - response_queue (multiprocessing.Queue): The queue for outgoing responses. - loaded_event (multiprocessing.Event): An event that signals when the model is loaded. - running_event (multiprocessing.Event): An event that signals when the model is running. - *args: Additional arguments. - **kwargs: Additional keyword arguments. - - Returns: - multiprocessing.Process: The model worker process. - """ - context = multiprocessing.get_context('spawn') - model_process = context.Process(target=model_worker, args=(model_params, request_queue, last_active_time, response_queue,loaded_event, running_event, *args), kwargs=kwargs) - model_process.start() - return model_process - -def timer_thread(request_queue: multiprocessing.Queue, - last_active_time: multiprocessing.Value, - loaded_event: multiprocessing.Event, - running_event: multiprocessing.Event, - timeout: int) -> None: - """ - Monitors the model worker process and stops it after a period of inactivity. - - Args: - request_queue (multiprocessing.Queue): The queue for incoming requests. - last_active_time (multiprocessing.Value): The last time the model was active. - loaded_event (multiprocessing.Event): An event that signals when the model is loaded. - running_event (multiprocessing.Event): An event that signals when the model is running. - timeout (int): The period of inactivity after which the model worker process is stopped. - """ - while True: - time.sleep(timeout) - - if time.time() - last_active_time.value > timeout and loaded_event.is_set() and not running_event.is_set(): - print(f"No activity for the last {timeout} seconds. Stopping the model worker.", flush=True) - request_queue.put("STOP") - Warning("Model worker stopped due to inactivity.") \ No newline at end of file diff --git a/scraibe/app/stg.py b/scraibe/app/stg.py deleted file mode 100644 index 4ce31a1..0000000 --- a/scraibe/app/stg.py +++ /dev/null @@ -1,229 +0,0 @@ -""" -stg - Scraibe to Gradio Interface - -This module provides an interface between the Scraibe transcription system and the Gradio user interface. -It defines a class, GradioTranscriptionInterface, that wraps the Scraibe model and provides methods for performing transcription tasks through the Gradio UI. - -Modules: - json: Used for encoding and decoding JSON data. - gradio as gr: Used for creating the Gradio UI. - tqdm: Used for displaying progress bars. - scraibe.app.global_var as gv: Contains global variables for the Scraibe app. -""" -import json -import gradio as gr -from tqdm import tqdm -from typing import Any, Dict, Union, Tuple, List - - - - -class GradioTranscriptionInterface: - """ - A class that provides an interface between the Gradio UI and the Scraibe transcription system. - - This class wraps the Scraibe model and provides methods for performing transcription tasks through the Gradio UI. - These tasks include auto transcription, transcription, and diarisation. - - Attributes: - model (Scraibe): The Scraibe model for performing transcription tasks. - """ - - def __init__(self, model) -> None: - """ - Initializes the GradioTranscriptionInterface with a Scraibe model. - - Args: - model (Scraibe): The Scraibe model for performing transcription tasks. - *args (Any): Additional positional arguments. - **kwargs (Dict[str, Any]): Additional keyword arguments. - """ - - self.model = model - - def autotranscribe(self, source: Union[str, List[str]], - num_speakers: int, - translate: bool, - language: str, - *args: Any, **kwargs: Dict[str, Any]) -> Tuple[str, Union[str, dict]]: - """ - Performs auto transcription on the given source. - - Args: - source (Union[str, List[str]]): The source to transcribe. This can be a string representing a single source, - or a list of strings representing multiple sources. - num_speakers (int): The number of speakers in the source. - translate (bool): Whether to translate the transcription. - language (str): The language of the source. - *args (Any): Additional positional arguments. - **kwargs (Dict[str, Any]): Additional keyword arguments. - - Returns: - Tuple[str, Union[str, dict]]: A tuple containing the transcribed text (str) and the JSON output (str or dict). - """ - - _kwargs = { - "num_speakers": num_speakers if num_speakers != 0 else None, - "language": language if language != "None" else None, - "task": 'translate' if translate else None - } - if isinstance(source, str): - try: - result = self.model.autotranscribe(source, **_kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - - return str(result), result.get_json() - - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - try: - res = self.model.autotranscribe(s, **_kwargs) - except ValueError: - _name = s.split("/")[-1] - res = f"NO TRANSCRIPT FOUND FOR {_name}" - gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.") - result.append(res) - - out = '' - out_dict = {} - for i, r in enumerate(result): - out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" - out += str(r) - out += "\n\n" - - if isinstance(r, str): - out_dict[source_names[i]] = r - else: - out_dict[source_names[i]] = r.get_dict() - - return out, json.dumps(out_dict, indent=4) - - else: - raise gr.Error("Please provide a valid audio file.") - - - def transcribe(self, source: Union[str, List[str]], - translate: bool, - language: str, - *args: Any, **kwargs: Dict[str, Any]) -> str: - """ - Performs transcription on the given source. - - Args: - source (Union[str, List[str]]): The source to transcribe. - This can be a string representing a single source, or a list of strings representing multiple sources. - translate (bool): Whether to translate the transcription. - language (str): The language of the source. - *args (Any): Additional positional arguments. - **kwargs (Dict[str, Any]): Additional keyword arguments. - - Returns: - str: The transcribed text. - """ - - _kwargs = { - "language": language if language != "None" else None, - "task": 'translate' if translate == "Yes" else None - } - - if isinstance(source, str): - result = self.model.transcribe(source, **_kwargs) - - return str(result) - - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): - res = self.model.transcribe(s, **_kwargs) - result.append(res) - - out = '' - for i, res in enumerate(result): - out += f"TRANSCRIPT FOR {source_names[i]}:\n\n" - out += str(res) - out += "\n\n" - - return out - - else: - raise gr.Error("Please provide a valid audio file.") - - def diarisation(self, source: Union[str, List[str]], - num_speakers: int, - *args: Any, **kwargs: Dict[str, Any]) -> str: - """ - Performs diarisation on the given source. - - Args: - source (Union[str, List[str]]): The source to perform diarisation on. - This can be a string representing a single source, - or a list of strings representing multiple sources. - num_speakers (int): The number of speakers in the source. - *args (Any): Additional positional arguments. - **kwargs (Dict[str, Any]): Additional keyword arguments. - - Returns: - str: The JSON output of the diarisation result. - """ - - - _kwargs = { - "num_speakers": num_speakers if num_speakers != 0 else None, - } - - if isinstance(source, str): - try: - result = self.model.diarization(source, **_kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - - return json.dumps(result, indent=2) - elif isinstance(source, list): - source_names = [s.split("/")[-1] for s in source] - result = [] - for s in tqdm(source, total=len(source),desc = "Performing diarisation"): - try: - res = self.model.diarization(s, **_kwargs) - except ValueError: - - res = f"NO DIARISATION FOUND FOR {s}" - gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") - result.append(res) - - out = {} - - for i, res in enumerate(result): - out[source_names[i]] = res - - return json.dumps(out, indent=4) - - else: - gr.Error("Please provide a valid audio file.") - - def get_task_from_str(self, task: str) -> callable: - """ - Returns the corresponding task function based on the given task string. - - Args: - task (str): The task string. This can be one of the following: 'Auto Transcribe', 'Transcribe', 'Diarisation'. - - Returns: - callable: The corresponding task function. - """ - - if task == 'Auto Transcribe': - return self.autotranscribe - elif task == 'Transcribe': - return self.transcribe - elif task == 'Diarisation': - return self.diarisation - else: - raise ValueError("Invalid task string.") - - diff --git a/scraibe/app/utils.py b/scraibe/app/utils.py deleted file mode 100644 index 593db98..0000000 --- a/scraibe/app/utils.py +++ /dev/null @@ -1,298 +0,0 @@ -""" -utils.py - -This module contains two classes, ConfigLoader and AppConfig, which are used to manage application-specific configuration settings. - -The ConfigLoader class provides methods for loading a configuration file, applying overrides, and restoring default values for specified keys. It also includes methods for recursively updating nested keys and getting the default configuration. - -The AppConfig class extends ConfigLoader and provides additional methods for setting global variables, launch options, and layout options from the configuration. It also includes methods for checking and setting file paths, and getting layout options. - -Classes: - ConfigLoader: Manages application-specific configuration settings. - AppConfig: Extends ConfigLoader to provide additional methods for managing application-specific configuration settings. -""" -import os -import warnings -import yaml -from typing import Any, Dict, Optional - -import scraibe.app.global_var as gv - -CURRENT_PATH = os.path.dirname(os.path.realpath(__file__)) - -class ConfigLoader: - """A class that extends ConfigLoader to manage application-specific configuration settings. - - This class provides methods for setting global variables, launch options, and layout options from the configuration. - - Attributes: - config (Dict[str, Any]): The current configuration settings. - launch (Dict[str, Any]): The launch configuration settings. - model (Dict[str, Any]): The model configuration settings. - advanced (Dict[str, Any]): The advanced configuration settings. - queue (Dict[str, Any]): The queue configuration settings. - layout (Dict[str, Any]): The layout configuration settings. - """ - def __init__(self, config: Dict[str, Any]): - """Initializes a new instance of the ConfigLoader class. - - Args: - config (dict): The configuration dictionary. - """ - self.config = config - - def restore_defaults_for_keys(self, *args: str): - """Restores specified keys to their default values, including nested keys. - - Args: - *args (str): A list of keys or paths to keys (for nested dictionaries) to restore to default values. - Each key or path should be a list of keys leading to the desired key. - """ - default_config = self.get_default_config() - - for key in args: - self.apply_overrides(self.config, default_config, key) - - - - @classmethod - def load_config(cls, yaml_path: Optional[str] = None, **kwargs: Any) -> 'ConfigLoader': - """Load the configuration file and apply overrides. - - Args: - yaml_path (str, optional): Path to the YAML file containing overrides. - **kwargs: Additional overrides as keyword arguments. - - Returns: - ConfigLoader: A ConfigLoader object with the loaded configuration. - """ - - # Load the original configuration - config = cls.get_default_config() - - # Override with another YAML file if provided - - if yaml_path: - with open(yaml_path, 'r') as file: - override_config = yaml.safe_load(file) - cls.apply_overrides(config, override_config) - - # Apply overrides from kwargs - cls.apply_overrides(config, kwargs) - return cls(config) - - @staticmethod - def apply_overrides(orig_dict: Dict[str, Any], override_dict: Dict[str, Any], specific: Optional[str] = None): - """Recursively apply overrides to the configuration, only for specific keys. - - Args: - orig_dict (Dict[str, Any]): The original dictionary. - override_dict (Dict[str, Any]): The override dictionary. - specific (str, optional): The specific key to override. - """ - for key, value in override_dict.items(): - - if isinstance(value, dict): - # If the value is a dict, apply recursively - sub_dict = orig_dict.get(key, {}) - ConfigLoader.apply_overrides(sub_dict, value, specific) - orig_dict[key] = sub_dict - else: - # Apply override for this key - if specific is None: - # If no specific keys are provided, update the key - # If the value is not a dict, search for the key and update - if ConfigLoader.update_nested_key(orig_dict, key, value): - continue # Key was found and updated - orig_dict[key] = value # Key not found, update at this level - - elif key in specific: - # If specific keys are provided, only update if the key is in the list - if ConfigLoader.update_nested_key(orig_dict, specific, value): - continue # Key was found and updated - orig_dict[specific] = value - - @staticmethod - def update_nested_key(d, key, value): - """Recursively search and update the key in nested dictionary. - - Args: - d (Dict[str, Any]): The dictionary. - key (str): The key to update. - value (Any): The new value. - - Returns: - bool: True if the key was found and updated, False otherwise. - """ - - if key in d: - d[key] = value - return True - for k, v in d.items(): - if isinstance(v, dict) and ConfigLoader.update_nested_key(v, key, value): - return True - return False - - @staticmethod - def get_default_config(): - """Return the default configuration. - - Returns: - Dict[str, Any]: The default configuration. - """ - with open(gv.DEFAULT_APP_CONIFG_PATH , 'r') as file: - config = yaml.safe_load(file) - return config - - -class AppConfig(ConfigLoader): - """A class that extends ConfigLoader to manage application-specific configuration settings. - - This class provides methods for setting global variables, launch options, and layout options from the configuration. - - Attributes: - config (dict): The current configuration settings. - launch (dict): The launch configuration settings. - model (dict): The model configuration settings. - advanced (dict): The advanced configuration settings. - queue (dict): The queue configuration settings. - layout (dict): The layout configuration settings. - """ - def __init__(self, config : Dict[str, Any]): - """Initializes a new instance of the AppConfig class. - - Args: - config (dict): The configuration dictionary. - """ - self.config = config - - self.set_global_vars_from_config() - self.set_launch_options() - self.set_layout_options() - - self.launch = self.config.get("launch") - self.model = self.config.get("model") - self.advanced = self.config.get("advanced") - self.queue = self.config.get("queue") - self.layout = self.config.get("layout") - - def set_global_vars_from_config(self) -> None: - """Sets the global variables from a configuration dictionary. - - Args: - config (dict): A dictionary containing the parameters for the model. Modify the default parameters in the config.yml file. - - Returns: - None - """ - - gv.MODEL_PARAMS = self.config.get('model') - gv.TIMEOUT = self.config.get("advanced").get('timeout') - - def set_launch_options(self) -> None: - """Sets the launch options from a configuration dictionary. - - Args: - None - - Returns: - None - """ - launch_options = self.config.get("launch") - - if launch_options.get('auth').pop('auth_enabled'): - self.config['launch']['auth'] = (launch_options.get('auth').pop('auth_username'), - launch_options.get('auth').pop('auth_password')) - else: - self.config['launch']['auth'] = None - - def set_layout_options(self) -> None: - """Sets the layout options from a configuration dictionary. - - Args: - None - - Returns: - None - """ - self.config['layout']['header'] = self.check_and_set_path(self.config['layout'], 'header') - self.config['layout']['footer'] = self.check_and_set_path(self.config['layout'], 'footer') - self.config['layout']['logo'] = self.check_and_set_path(self.config['layout'], 'logo') - - def get_layout(self) -> Dict[str, str]: - """Gets the layout options from a configuration dictionary. - - Args: - None - - Returns: - dict: A dictionary containing the header and footer layout options. - """ - if not os.path.exists(self.config['layout']['header']) and \ - self.config['layout']['header'] == "scraibe/app/header.html": - - hname = os.path.join(CURRENT_PATH, "header.html") - - header = open(hname).read() - - elif not os.path.exists(self.config['layout']['header']) and self.config['layout']['header'] != "scraibe/app/header.html": - warnings.warn(f"Header file not found: {self.config['layout']['header']} \n" \ - "fall back to default.") - - hname = os.path.join(CURRENT_PATH, "header.html") - - header = open(hname).read() - elif os.path.exists(self.config['layout']['header']): - header = open(self.config['layout']['header']).read() - else: - warnings.warn(f"Header file not found: {self.config['layout']['header']}") - header = None - - - if header != None: - if self.config['layout']['logo'] == "scraibe/app/logo.svg": - header = header.replace("/file=logo.svg", f"/file={os.path.join(CURRENT_PATH, 'logo.svg')}") - elif self.config['layout']['logo'] != "scraibe/app/logo.svg": - header = header.replace("/file=logo.svg", f"/file={self.config['layout']['logo']}") - else: - warnings.warn(f"Logo file not found: {self.config['layout']['logo']}") - - - if self.config['layout']['footer'] != None: - if os.path.exists(self.config['layout']['footer']): - footer = open(self.config['layout']['footer']).read() - elif self.config['layout']['footer'] == None: - footer = None - else: - warnings.warn(f"Footer file not found: {self.config['layout']['footer']}") - else: - footer = None - return {'header' : header , - 'footer' : footer} - - @staticmethod - def check_and_set_path(config_item: dict, key: str) -> Optional[str]: - """Check if the file exists at the given path. If not, try with CURRENT_PATH. - Raise FileNotFoundError if the file still doesn't exist. - - Args: - config_item (dict): The configuration item. - key (str): The key to check in the configuration item. - - Returns: - str: The path to the file if it exists, None otherwise. - """ - _current_path = os.path.dirname(os.path.realpath(__file__)) # Define your CURRENT_PATH - - file_path = config_item.get(key) - if file_path is None: - return None - if not os.path.exists(file_path): - new_path = os.path.join(_current_path, file_path) - if not os.path.exists(new_path): - warnings.warn(f"{key.capitalize()} file not found: {config_item[key]} \n" \ - "fall back to default.") - else: - config_item[key] = new_path - - return config_item[key] \ No newline at end of file From 5e71667119c1f884bb00e3206293adfeae145bec Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 12 Feb 2024 11:43:59 +0100 Subject: [PATCH 37/42] removed app --- scraibe/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scraibe/__init__.py b/scraibe/__init__.py index 3fd77e8..233cd4f 100644 --- a/scraibe/__init__.py +++ b/scraibe/__init__.py @@ -7,8 +7,6 @@ from .diarisation import * from .version import get_version as _get_version from .misc import * -from .app import * - from .cli import * __version__ = _get_version() From 45ee0b00b4f6f7799fbe38f530abda556b8eba9f Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 12 Feb 2024 11:45:17 +0100 Subject: [PATCH 38/42] removed start server for now --- scraibe/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scraibe/cli.py b/scraibe/cli.py index 1c7f320..7cc7b1d 100644 --- a/scraibe/cli.py +++ b/scraibe/cli.py @@ -10,7 +10,7 @@ import json from .autotranscript import Scraibe from .misc import ParseKwargs -from .app.app import gradio_Interface + from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE from torch.cuda import is_available @@ -164,6 +164,7 @@ def cli(): else: # unfinished code + raise NotImplementedError("Currently not Working") import subprocess import sys From 6aa25dfec10e8d93427560ccc799c34ab1720b6b Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 12 Feb 2024 12:35:52 +0100 Subject: [PATCH 39/42] resolve merge conflict --- scraibe/misc.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scraibe/misc.py b/scraibe/misc.py index ae9136e..992e40c 100644 --- a/scraibe/misc.py +++ b/scraibe/misc.py @@ -13,7 +13,9 @@ if CACHE_DIR != PYANNOTE_CACHE_DIR: WHISPER_DEFAULT_PATH = os.path.join(CACHE_DIR, "whisper") PYANNOTE_DEFAULT_PATH = os.path.join(CACHE_DIR, "pyannote") -PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml") +PYANNOTE_DEFAULT_CONFIG = os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml") \ + if os.path.exists(os.path.join(PYANNOTE_DEFAULT_PATH, "config.yaml")) \ + else 'pyannote/speaker-diarization-3.1' def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) -> None: """Configure diarization pipeline from a YAML file. From 8af2294814bbc85659c2a8c78a66edf90a8b6267 Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Mon, 12 Feb 2024 12:40:51 +0100 Subject: [PATCH 40/42] avoid merge conflict --- scraibe/diarisation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scraibe/diarisation.py b/scraibe/diarisation.py index 570ac29..e598d30 100644 --- a/scraibe/diarisation.py +++ b/scraibe/diarisation.py @@ -212,9 +212,7 @@ class Diariser: if not os.path.exists(model) and use_auth_token is None: use_auth_token = cls._get_token() - model = 'pyannote/speaker-diarization' - elif not os.path.exists(model) and use_auth_token is not None: - model = 'pyannote/speaker-diarization' + elif os.path.exists(model) and not use_auth_token: # check if model can be found locally nearby the config file with open(model, 'r') as file: From 723dd7d0032c4455f1eba8eb54a7fecfaad12fe1 Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Mon, 12 Feb 2024 12:45:28 +0100 Subject: [PATCH 41/42] Delete test_multiprocessing.py removed for merge --- test_multiprocessing.py | 105 ---------------------------------------- 1 file changed, 105 deletions(-) delete mode 100644 test_multiprocessing.py diff --git a/test_multiprocessing.py b/test_multiprocessing.py deleted file mode 100644 index ad9edc2..0000000 --- a/test_multiprocessing.py +++ /dev/null @@ -1,105 +0,0 @@ -import multiprocessing -import os -import threading -import queue -import time -import torch -from scraibe import Scraibe - -def input_thread(input_queue, processed_event): - while True: - processed_event.wait() # Wait for the previous input to be processed - processed_event.clear() # Clear the event for the next input - inp = input("Enter the path to the audio file ('q' to quit, 'reload' to reload model): ") - input_queue.put(inp) - -def clear_queue(queue): - while not queue.empty(): - try: - queue.get_nowait() - except queue.Empty: - continue - -def model_worker(request_queue, last_active_time, response_queue,loaded_event, running_event): - - loaded_event.set() - - model = Scraibe(dia_model="models/pyannote/config.yaml") - - while True: - audio_path = request_queue.get() - if audio_path == "STOP": - break - running_event.set() - transcription = model.autotranscribe(audio_path) - running_event.clear() - response_queue.put(transcription) - last_active_time.value = time.time() - - del model - torch.cuda.empty_cache() - clear_queue(request_queue) - clear_queue(response_queue) - loaded_event.clear() - - -def start_model_worker(request_queue, last_active_time, response_queue,loaded_event, running_event): - model_process = multiprocessing.Process(target=model_worker, args=(request_queue, last_active_time, response_queue,loaded_event, running_event)) - model_process.start() - return model_process - -def timer_thread(request_queue, last_active_time,loaded_event, running_event, timeout=30): - while True: - time.sleep(timeout) - - if time.time() - last_active_time.value > timeout and loaded_event.is_set() and not running_event.is_set(): - print(f"No activity for the last {timeout} seconds. Stopping the model worker.", flush=True) - request_queue.put("STOP") - -if __name__ == "__main__": - request_queue = multiprocessing.Queue() - response_queue = multiprocessing.Queue() - input_queue = queue.Queue() - last_active_time = multiprocessing.Value('d', time.time()) - loaded_event = multiprocessing.Event() - running_event = multiprocessing.Event() - - processed_event = multiprocessing.Event() - processed_event.set() # Initially set to allow the first input - - model_process = start_model_worker(request_queue, last_active_time, response_queue,loaded_event ,running_event) - timer = threading.Thread(target=timer_thread, args=(request_queue, last_active_time, loaded_event, running_event), daemon=True) - input_handler = threading.Thread(target=input_thread, args=(input_queue,processed_event)) - - timer.start() - input_handler.start() - - while True: - - audio_file_path = input_queue.get() # Get input from the input thread - print(audio_file_path) - - if audio_file_path.lower() == 'q': - request_queue.put("STOP") - model_process.join() - break - elif audio_file_path.lower() == 'reload': - if loaded_event.is_set(): - request_queue.put("STOP") - model_process.join() - model_process = start_model_worker(request_queue, last_active_time, response_queue, loaded_event, running_event) - print("Model reloaded.") - elif not os.path.exists(audio_file_path): - print("File does not exist.") - else: - if not loaded_event.is_set(): - model_process = start_model_worker(request_queue, last_active_time, response_queue, loaded_event, running_event) - request_queue.put(audio_file_path) - transcription = response_queue.get() - print(transcription) - - processed_event.set() # Signal that the input has been processed - - model_process.join() - timer.join() - input_handler.join() From a48829b7cdc17804b9434077475d7fa64d0ca77c Mon Sep 17 00:00:00 2001 From: Jacob Schmieder Date: Mon, 12 Feb 2024 12:46:06 +0100 Subject: [PATCH 42/42] Delete test_multithreading.py --- test_multithreading.py | 85 ------------------------------------------ 1 file changed, 85 deletions(-) delete mode 100644 test_multithreading.py diff --git a/test_multithreading.py b/test_multithreading.py deleted file mode 100644 index fb4e301..0000000 --- a/test_multithreading.py +++ /dev/null @@ -1,85 +0,0 @@ -import os -import time - -from scraibe import Scraibe - -import multiprocessing -import threading -import torch -import gc - -model = None -last_used = time.time() -transcribe_active = threading.Event() - -def transcribe_thread(audio): - - global model - transcribe_active.set() - print(model.autotranscribe(audio)) - transcribe_active.clear() - -def model_thread(): - global model, last_used - model = Scraibe(dia_model= "models/pyannote/config.yaml") - last_used = time.time() - -def interaction_thread(): - global model, model_runner - while True: - command = input("Enter a command ('q' to quit, 'reload' to reload model): ") - - if command.lower() == 'q': - break - elif command.lower() == 'reload': - print("Reloading model...", model) - if model is None: - transcribe_active.clear() #black magic - model_runner = threading.Thread(target=model_thread) - model_runner.start() - model_runner.join() - - else: - print("Model is already loaded.") - else: - if os.path.exists(command): - transcribe = threading.Thread(target=transcribe_thread, args=(command,)) - transcribe.start() - transcribe.join() - - else: - print("File does not exist.") - -def delete_unused_model(model_runner): - global model, last_used, transcribe_active - - while True: - _unload_porperty = (not transcribe_active.is_set() and (time.time() - last_used > 30) and model is not None) - if _unload_porperty: - - del model - model = None - - gc.collect() - torch.cuda.empty_cache() - - model_runner.join() - - print("Model deleted") - time.sleep(10) - -if __name__ == "__main__": - - lock = threading.Lock() - - interaction = threading.Thread(target=interaction_thread) - model_runner = threading.Thread(target=model_thread, daemon=True) - model_deleter = threading.Thread(target=delete_unused_model, args=(model_runner,), daemon=True) - - model_runner.start() - model_deleter.start() - - # Ensure the model is initialized before starting the interaction - model_runner.join() - interaction.start() - interaction.join() \ No newline at end of file