added support for multiple file uploads

This commit is contained in:
Jaikinator
2023-09-22 12:42:58 +02:00
parent 8019ce43b0
commit bc1dd9d541
+117 -24
View File
@@ -35,8 +35,9 @@ Usage:
import json import json
import gradio as gr import gradio as gr
from scraibe import AutoTranscribe, Transcript from tqdm import tqdm
from scraibe import AutoTranscribe, Transcript
theme = gr.themes.Soft( theme = gr.themes.Soft(
primary_hue="green", primary_hue="green",
@@ -89,13 +90,43 @@ class GradioTranscriptionInterface:
"language": language if language != "None" else None, "language": language if language != "None" else None,
"task": 'translate' if translation else None "task": 'translate' if translation else None
} }
if isinstance(source, str):
try:
result = self.model.autotranscribe(source, **kwargs)
except ValueError:
raise gr.Error("Couldn't detect any speech in the provided audio. \
Please try again!")
return str(result), result.get_json()
try: elif isinstance(source, list):
result = self.model.autotranscribe(source, **kwargs) source_names = [s.split("/")[-1] for s in source]
except ValueError: result = []
raise gr.Error("Couldn't detect any speech in the provided audio. \ for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
Please try again!") try:
return str(result), result.get_json() res = self.model.autotranscribe(s, **kwargs)
except ValueError:
_name = s.split("/")[-1]
res = f"NO TRANSCRIPT FOUND FOR {_name}"
gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
result.append(res)
out = ''
out_dict = {}
for i, r in enumerate(result):
out += f"TRANSCRIPT {i} FOR ({source_names[i]}):\n\n"
out += str(r)
out += "\n\n"
if isinstance(r, str):
out_dict[source_names[i]] = r
else:
out_dict[source_names[i]] = r.get_dict()
return out, json.dumps(out_dict, indent=4)
else:
raise gr.Error("Please provide a valid audio file.")
def transcribe(self, source, translation, language): def transcribe(self, source, translation, language):
@@ -110,8 +141,28 @@ class GradioTranscriptionInterface:
"task": 'translate' if translation == "Yes" else None "task": 'translate' if translation == "Yes" else None
} }
result = self.model.transcribe(source, **kwargs) if isinstance(source, str):
return str(result) result = self.model.transcribe(source, **kwargs)
return str(result)
elif isinstance(source, list):
source_names = [s.split("/")[-1] for s in source]
result = []
for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
res = self.model.transcribe(s, **kwargs)
result.append(res)
out = ''
for i, res in enumerate(result):
out += f"TRANSCRIPT {i} FOR ({source_names[i]}):\n\n"
out += str(res)
out += "\n\n"
return out
else:
raise gr.Error("Please provide a valid audio file.")
def perform_diarisation(self, source, num_speakers): def perform_diarisation(self, source, num_speakers):
""" """
@@ -124,13 +175,35 @@ class GradioTranscriptionInterface:
"num_speakers": num_speakers if num_speakers != 0 else None, "num_speakers": num_speakers if num_speakers != 0 else None,
} }
if isinstance(source, str):
try:
result = self.model.diarization(source, **kwargs)
except ValueError:
raise gr.Error("Couldn't detect any speech in the provided audio. \
Please try again!")
try: return json.dumps(result, indent=2)
result = self.model.diarization(source, **kwargs) elif isinstance(source, list):
except ValueError: source_names = [s.split("/")[-1] for s in source]
raise gr.Error("Couldn't detect any speech in the provided audio. \ result = []
Please try again!") for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
return json.dumps(result, indent=2) try:
res = self.model.diarization(s, **kwargs)
except ValueError:
res = f"NO DIARISATION FOUND FOR {s}"
gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
result.append(res)
out = {}
for i, res in enumerate(result):
out[source_names[i]] = res
return json.dumps(out, indent=4)
else:
gr.Error("Please provide a valid audio file.")
#### ####
# Gradio Interface # Gradio Interface
@@ -197,7 +270,7 @@ def gradio_Interface(model : AutoTranscribe = None):
gr.update(visible = True), gr.update(visible = True),
gr.update(visible = False, value = None)) gr.update(visible = False, value = None))
elif choice == "File": elif choice == "File or Files":
return (gr.update(visible = False, value = None), return (gr.update(visible = False, value = None),
gr.update(visible = False, value = None), gr.update(visible = False, value = None),
@@ -205,22 +278,42 @@ def gradio_Interface(model : AutoTranscribe = None):
gr.update(visible = False, value = None), gr.update(visible = False, value = None),
gr.update(visible = True)) gr.update(visible = True))
def run_scribe(task, num_speakers, translate, language, audio1, audio2, video1, video2, file_in, progress = gr.Progress(track_tqdm= True)): def run_scribe(task,
num_speakers,
translate,
language,
audio1,
audio2,
video1,
video2,
file_in,
progress = gr.Progress(track_tqdm= True)):
# get *args which are not None # get *args which are not None
progress(0, desc='Starting task...') progress(0, desc='Starting task...')
source = audio1 or audio2 or video1 or video2 or file_in source = audio1 or audio2 or video1 or video2 or file_in
if isinstance(source, list):
source = [s.name for s in source]
if len(source) == 1:
source = source[0]
if task == 'Auto Transcribe': if task == 'Auto Transcribe':
out_str , out_json = pipe.auto_transcribe(source = source, out_str , out_json = pipe.auto_transcribe(source = source,
num_speakers = num_speakers, num_speakers = num_speakers,
translation = translate, translation = translate,
language = language) language = language)
return (gr.update(value = out_str, visible = True), if isinstance(source, str):
gr.update(value = out_json, visible = True), return (gr.update(value = out_str, visible = True),
gr.update(visible = True), gr.update(value = out_json, visible = True),
gr.update(visible = True)) gr.update(visible = True),
gr.update(visible = True))
else:
return (gr.update(value = out_str, visible = True),
gr.update(value = out_json, visible = True),
gr.update(visible = False),
gr.update(visible = False))
elif task == 'Transcribe': elif task == 'Transcribe':
@@ -279,7 +372,7 @@ def gradio_Interface(model : AutoTranscribe = None):
leave it at None.", visible= True) leave it at None.", visible= True)
input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video"
,"File"], label="Input Type", value="Upload Audio") ,"File or Files"], label="Input Type", value="Upload Audio")
audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio", audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
interactive= True, visible= True) interactive= True, visible= True)
@@ -289,7 +382,7 @@ def gradio_Interface(model : AutoTranscribe = None):
interactive= True, visible= False) interactive= True, visible= False)
video2 = gr.Video(source="webcam", label="Record Video", type="filepath", video2 = gr.Video(source="webcam", label="Record Video", type="filepath",
interactive= True, visible= False) interactive= True, visible= False)
file_in = gr.File(label="Upload File", interactive= True, visible= False) file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
submit = gr.Button() submit = gr.Button()