From bc1dd9d541ec89558c19969e79694995479e9e9c Mon Sep 17 00:00:00 2001
From: Jaikinator <schmieder.jacob@web.de>
Date: Fri, 22 Sep 2023 12:42:58 +0200
Subject: [PATCH] added support for multiple file uploads

---
 scraibe/app/gradio_app.py | 141 +++++++++++++++++++++++++++++++-------
 1 file changed, 117 insertions(+), 24 deletions(-)

diff --git a/scraibe/app/gradio_app.py b/scraibe/app/gradio_app.py
index 6f09506..6d70097 100644
--- a/scraibe/app/gradio_app.py
+++ b/scraibe/app/gradio_app.py
@@ -35,8 +35,9 @@ Usage:
 import json
 
 import gradio as gr
-from scraibe import AutoTranscribe, Transcript
+from tqdm import tqdm
 
+from scraibe import AutoTranscribe, Transcript
 
 theme = gr.themes.Soft(
     primary_hue="green",
@@ -89,13 +90,43 @@ class GradioTranscriptionInterface:
             "language": language if language != "None" else None,
             "task": 'translate' if translation else None
         }
+        if isinstance(source, str):
+            try:
+                result = self.model.autotranscribe(source, **kwargs)
+            except ValueError:
+                raise gr.Error("Couldn't detect any speech in the provided audio. \
+                        Please try again!")
+            
+            return str(result), result.get_json()
         
-        try:
-            result = self.model.autotranscribe(source, **kwargs)
-        except ValueError:
-            raise gr.Error("Couldn't detect any speech in the provided audio. \
-                    Please try again!")
-        return str(result), result.get_json()
+        elif isinstance(source, list):
+            source_names = [s.split("/")[-1] for s in source]
+            result = []
+            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
+                try:
+                    res = self.model.autotranscribe(s, **kwargs)
+                except ValueError:
+                    _name = s.split("/")[-1]
+                    res = f"NO TRANSCRIPT FOUND FOR {_name}"
+                    gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
+                result.append(res)
+            
+            out = ''
+            out_dict = {}
+            for i, r in enumerate(result):
+                out += f"TRANSCRIPT {i} FOR ({source_names[i]}):\n\n"
+                out += str(r)
+                out += "\n\n"
+                
+                if isinstance(r, str):
+                    out_dict[source_names[i]] = r
+                else:
+                    out_dict[source_names[i]] = r.get_dict()
+              
+            return out, json.dumps(out_dict, indent=4)
+        
+        else:
+            raise gr.Error("Please provide a valid audio file.")
 
 
     def transcribe(self, source, translation, language):
@@ -110,8 +141,28 @@ class GradioTranscriptionInterface:
             "task": 'translate' if translation == "Yes" else None
         }
         
-        result = self.model.transcribe(source, **kwargs)
-        return str(result)
+        if isinstance(source, str):
+            result = self.model.transcribe(source, **kwargs)
+            
+            return str(result)
+        
+        elif isinstance(source, list):
+            source_names = [s.split("/")[-1] for s in source]
+            result = []
+            for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
+                res = self.model.transcribe(s, **kwargs)
+                result.append(res)
+            
+            out = ''
+            for i, res in enumerate(result):
+                out += f"TRANSCRIPT {i} FOR ({source_names[i]}):\n\n"
+                out += str(res)
+                out += "\n\n"
+            
+            return out
+        
+        else:
+            raise gr.Error("Please provide a valid audio file.")
 
     def perform_diarisation(self, source, num_speakers):
         """
@@ -124,13 +175,35 @@ class GradioTranscriptionInterface:
             "num_speakers": num_speakers if num_speakers != 0 else None,
         }
         
+        if isinstance(source, str):
+            try:
+                result = self.model.diarization(source, **kwargs)
+            except ValueError:
+                raise gr.Error("Couldn't detect any speech in the provided audio. \
+                        Please try again!")
         
-        try:
-            result = self.model.diarization(source, **kwargs)
-        except ValueError:
-            raise gr.Error("Couldn't detect any speech in the provided audio. \
-                    Please try again!")
-        return json.dumps(result, indent=2)
+            return json.dumps(result, indent=2)
+        elif isinstance(source, list):
+            source_names = [s.split("/")[-1] for s in source]
+            result = []
+            for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
+                try:
+                    res = self.model.diarization(s, **kwargs)
+                except ValueError:
+                    res = f"NO DIARISATION FOUND FOR {s}"
+                    gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
+                result.append(res)
+            
+            out = {}
+            
+            for i, res in enumerate(result):
+                out[source_names[i]] = res
+                
+            return json.dumps(out, indent=4)
+        
+        else:
+            gr.Error("Please provide a valid audio file.")
+            
 
 ####
 # Gradio Interface
@@ -197,7 +270,7 @@ def gradio_Interface(model : AutoTranscribe = None):
                     gr.update(visible = True),
                     gr.update(visible = False, value = None))
             
-        elif choice == "File":
+        elif choice == "File or Files":
             
             return (gr.update(visible = False, value = None),
                     gr.update(visible = False, value = None),
@@ -205,22 +278,42 @@ def gradio_Interface(model : AutoTranscribe = None):
                     gr.update(visible = False, value = None),
                     gr.update(visible = True))
 
-    def run_scribe(task, num_speakers, translate, language, audio1, audio2, video1, video2, file_in, progress = gr.Progress(track_tqdm= True)):
+    def run_scribe(task,
+                   num_speakers,
+                   translate,
+                   language,
+                   audio1,
+                   audio2,
+                   video1,
+                   video2,
+                   file_in,
+                   progress = gr.Progress(track_tqdm= True)):
         # get *args which are not None
         progress(0, desc='Starting task...')
         source = audio1 or audio2 or video1 or video2 or file_in
         
+        if isinstance(source, list):
+            source = [s.name for s in source]
+            if len(source) == 1:
+                source = source[0]
+ 
         if task == 'Auto Transcribe':
-            
+    
             out_str , out_json = pipe.auto_transcribe(source = source,
                                 num_speakers = num_speakers,
                                 translation = translate,
                                 language = language)
             
-            return (gr.update(value = out_str, visible = True),
-                    gr.update(value = out_json, visible = True),
-                    gr.update(visible = True),
-                    gr.update(visible = True))        
+            if isinstance(source, str):
+                return (gr.update(value = out_str, visible = True),
+                        gr.update(value = out_json, visible = True),
+                        gr.update(visible = True),
+                        gr.update(visible = True))      
+            else:
+                return (gr.update(value = out_str, visible = True),
+                        gr.update(value = out_json, visible = True),
+                        gr.update(visible = False),
+                        gr.update(visible = False))  
             
         elif task == 'Transcribe':
             
@@ -279,7 +372,7 @@ def gradio_Interface(model : AutoTranscribe = None):
                                     leave it at None.", visible= True)
                 
                 input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video" 
-                                    ,"File"], label="Input Type", value="Upload Audio")
+                                    ,"File or Files"], label="Input Type", value="Upload Audio")
                 
                 audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
                                     interactive= True, visible= True)
@@ -289,7 +382,7 @@ def gradio_Interface(model : AutoTranscribe = None):
                                     interactive= True, visible= False)
                 video2 = gr.Video(source="webcam", label="Record Video", type="filepath",
                                     interactive= True, visible= False)
-                file_in = gr.File(label="Upload File", interactive= True, visible= False)
+                file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
                 
                 submit = gr.Button()