From bc1dd9d541ec89558c19969e79694995479e9e9c Mon Sep 17 00:00:00 2001 From: Jaikinator Date: Fri, 22 Sep 2023 12:42:58 +0200 Subject: [PATCH] added support for multiple file uploads --- scraibe/app/gradio_app.py | 141 +++++++++++++++++++++++++++++++------- 1 file changed, 117 insertions(+), 24 deletions(-) diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py index 6f09506..6d70097 100644 --- a/scraibe/app/gradio_app.py +++ b/scraibe/app/gradio_app.py @@ -35,8 +35,9 @@ Usage: import json import gradio as gr -from scraibe import AutoTranscribe, Transcript +from tqdm import tqdm +from scraibe import AutoTranscribe, Transcript theme = gr.themes.Soft( primary_hue="green", @@ -89,13 +90,43 @@ class GradioTranscriptionInterface: "language": language if language != "None" else None, "task": 'translate' if translation else None } + if isinstance(source, str): + try: + result = self.model.autotranscribe(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") + + return str(result), result.get_json() - try: - result = self.model.autotranscribe(source, **kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - return str(result), result.get_json() + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): + try: + res = self.model.autotranscribe(s, **kwargs) + except ValueError: + _name = s.split("/")[-1] + res = f"NO TRANSCRIPT FOUND FOR {_name}" + gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.") + result.append(res) + + out = '' + out_dict = {} + for i, r in enumerate(result): + out += f"TRANSCRIPT {i} FOR ({source_names[i]}):\n\n" + out += str(r) + out += "\n\n" + + if isinstance(r, str): + out_dict[source_names[i]] = r + else: + out_dict[source_names[i]] = r.get_dict() + + return out, json.dumps(out_dict, indent=4) + + else: + raise gr.Error("Please provide a valid audio file.") def transcribe(self, source, translation, language): @@ -110,8 +141,28 @@ class GradioTranscriptionInterface: "task": 'translate' if translation == "Yes" else None } - result = self.model.transcribe(source, **kwargs) - return str(result) + if isinstance(source, str): + result = self.model.transcribe(source, **kwargs) + + return str(result) + + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Transcribing audio files"): + res = self.model.transcribe(s, **kwargs) + result.append(res) + + out = '' + for i, res in enumerate(result): + out += f"TRANSCRIPT {i} FOR ({source_names[i]}):\n\n" + out += str(res) + out += "\n\n" + + return out + + else: + raise gr.Error("Please provide a valid audio file.") def perform_diarisation(self, source, num_speakers): """ @@ -124,13 +175,35 @@ class GradioTranscriptionInterface: "num_speakers": num_speakers if num_speakers != 0 else None, } + if isinstance(source, str): + try: + result = self.model.diarization(source, **kwargs) + except ValueError: + raise gr.Error("Couldn't detect any speech in the provided audio. \ + Please try again!") - try: - result = self.model.diarization(source, **kwargs) - except ValueError: - raise gr.Error("Couldn't detect any speech in the provided audio. \ - Please try again!") - return json.dumps(result, indent=2) + return json.dumps(result, indent=2) + elif isinstance(source, list): + source_names = [s.split("/")[-1] for s in source] + result = [] + for s in tqdm(source, total=len(source),desc = "Performing diarisation"): + try: + res = self.model.diarization(s, **kwargs) + except ValueError: + res = f"NO DIARISATION FOUND FOR {s}" + gr.Warning(f"Couldn't detect any speech in {s} will skip this file.") + result.append(res) + + out = {} + + for i, res in enumerate(result): + out[source_names[i]] = res + + return json.dumps(out, indent=4) + + else: + gr.Error("Please provide a valid audio file.") + #### # Gradio Interface @@ -197,7 +270,7 @@ def gradio_Interface(model : AutoTranscribe = None): gr.update(visible = True), gr.update(visible = False, value = None)) - elif choice == "File": + elif choice == "File or Files": return (gr.update(visible = False, value = None), gr.update(visible = False, value = None), @@ -205,22 +278,42 @@ def gradio_Interface(model : AutoTranscribe = None): gr.update(visible = False, value = None), gr.update(visible = True)) - def run_scribe(task, num_speakers, translate, language, audio1, audio2, video1, video2, file_in, progress = gr.Progress(track_tqdm= True)): + def run_scribe(task, + num_speakers, + translate, + language, + audio1, + audio2, + video1, + video2, + file_in, + progress = gr.Progress(track_tqdm= True)): # get *args which are not None progress(0, desc='Starting task...') source = audio1 or audio2 or video1 or video2 or file_in + if isinstance(source, list): + source = [s.name for s in source] + if len(source) == 1: + source = source[0] + if task == 'Auto Transcribe': - + out_str , out_json = pipe.auto_transcribe(source = source, num_speakers = num_speakers, translation = translate, language = language) - return (gr.update(value = out_str, visible = True), - gr.update(value = out_json, visible = True), - gr.update(visible = True), - gr.update(visible = True)) + if isinstance(source, str): + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = True), + gr.update(visible = True)) + else: + return (gr.update(value = out_str, visible = True), + gr.update(value = out_json, visible = True), + gr.update(visible = False), + gr.update(visible = False)) elif task == 'Transcribe': @@ -279,7 +372,7 @@ def gradio_Interface(model : AutoTranscribe = None): leave it at None.", visible= True) input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" - ,"File"], label="Input Type", value="Upload Audio") + ,"File or Files"], label="Input Type", value="Upload Audio") audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", interactive= True, visible= True) @@ -289,7 +382,7 @@ def gradio_Interface(model : AutoTranscribe = None): interactive= True, visible= False) video2 = gr.Video(source="webcam", label="Record Video", type="filepath", interactive= True, visible= False) - file_in = gr.File(label="Upload File", interactive= True, visible= False) + file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False) submit = gr.Button()