From c8b9a7aa9339fd41b22d640c3fe8e89ecb4d79f1 Mon Sep 17 00:00:00 2001
From: "Schmieder, Jacob" <jacob.schmieder@dbfz.de>
Date: Thu, 30 May 2024 14:18:33 +0000
Subject: [PATCH 1/4] removed gradio related stuff

---
 scraibe/cli.py | 110 ++++++++++++++++---------------------------------
 1 file changed, 35 insertions(+), 75 deletions(-)

diff --git a/scraibe/cli.py b/scraibe/cli.py
index b6f2c17..9dfe395 100644
--- a/scraibe/cli.py
+++ b/scraibe/cli.py
@@ -37,22 +37,9 @@ def cli():
 
     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
 
-    group = parser.add_mutually_exclusive_group()
-
     parser.add_argument("-f", "--audio-files", nargs="+", type=str, default=None,
                         help="List of audio files to transcribe.")
 
-    group.add_argument('--start-server', action='store_true',
-                       help='Start the Gradio app.'
-                       'If set, all other arguments are ignored'
-                       'besides --server-config or --server-kwargs.')
-
-    parser.add_argument("--server-config", type=str, default=None,
-                        help="Path to the configy.yml file.")
-
-    parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={},
-                        help='Keyword arguments for the Gradio app.')
-
     parser.add_argument("--whisper-model-name", default="medium",
                         help="Name of the Whisper model to use.")
 
@@ -104,9 +91,6 @@ def cli():
 
     out_format = arg_dict.pop("output_format")
 
-    # seup server arg:
-    start_server = arg_dict.pop("start_server")
-
     task = arg_dict.pop("task")
 
     if args.num_threads > 0:
@@ -118,76 +102,52 @@ def cli():
 
     if arg_dict["whisper_model_directory"]:
         class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
+        
 
-    if not start_server:
+    model = Scraibe(**class_kwargs)
 
-        model = Scraibe(**class_kwargs)
+    if arg_dict["audio_files"]:
+        audio_files = arg_dict.pop("audio_files")
 
-        if arg_dict["audio_files"]:
-            audio_files = arg_dict.pop("audio_files")
+        if task == "autotranscribe" or task == "autotranscribe+translate":
+            for audio in audio_files:
+                if task == "autotranscribe+translate":
+                    task = "translate"
+                else:
+                    task = "transcribe"
 
-            if task == "autotranscribe" or task == "autotranscribe+translate":
-                for audio in audio_files:
-                    if task == "autotranscribe+translate":
-                        task = "translate"
-                    else:
-                        task = "transcribe"
+                out = model.autotranscribe(audio, task=task, language=arg_dict.pop(
+                    "language"), verbose=arg_dict.pop("verbose_output"))
+                basename = audio.split("/")[-1].split(".")[0]
+                print(f'Saving {basename}.{out_format} to {out_folder}')
+                out.save(os.path.join(
+                    out_folder, f"{basename}.{out_format}"))
 
-                    out = model.autotranscribe(audio, task=task, language=arg_dict.pop(
-                        "language"), verbose=arg_dict.pop("verbose_output"))
-                    basename = audio.split("/")[-1].split(".")[0]
-                    print(f'Saving {basename}.{out_format} to {out_folder}')
-                    out.save(os.path.join(
-                        out_folder, f"{basename}.{out_format}"))
+        elif task == "diarization":
+            for audio in audio_files:
+                if arg_dict.pop("verbose_output"):
+                    print("Verbose not implemented for diarization.")
 
-            elif task == "diarization":
-                for audio in audio_files:
-                    if arg_dict.pop("verbose_output"):
-                        print("Verbose not implemented for diarization.")
+                out = model.diarization(audio)
+                basename = audio.split("/")[-1].split(".")[0]
+                path = os.path.join(out_folder, f"{basename}.{out_format}")
 
-                    out = model.diarization(audio)
-                    basename = audio.split("/")[-1].split(".")[0]
-                    path = os.path.join(out_folder, f"{basename}.{out_format}")
+                print(f'Saving {basename}.{out_format} to {out_folder}')
 
-                    print(f'Saving {basename}.{out_format} to {out_folder}')
+                with open(path, "w") as f:
+                    json.dump(json.dumps(out, indent=1), f)
 
-                    with open(path, "w") as f:
-                        json.dump(json.dumps(out, indent=1), f)
+        elif task == "transcribe" or task == "translate":
 
-            elif task == "transcribe" or task == "translate":
+            for audio in audio_files:
 
-                for audio in audio_files:
-
-                    out = model.transcribe(audio, task=task,
-                                           language=arg_dict.pop("language"),
-                                           verbose=arg_dict.pop("verbose_output"))
-                    basename = audio.split("/")[-1].split(".")[0]
-                    path = os.path.join(out_folder, f"{basename}.{out_format}")
-                    with open(path, "w") as f:
-                        f.write(out)
-
-    else:  # unfinished code
-        raise NotImplementedError("Currently not Working")
-        import subprocess
-        import sys
-
-        execute_path = os.path.join(
-            os.path.dirname(__file__), "app/app_starter.py")
-
-        config = arg_dict.pop("server_config")
-        server_kwargs = arg_dict.pop("server_kwargs")
-
-        if not config:
-            subprocess.run([sys.executable, execute_path,
-                           f"--server-kwargs={server_kwargs}"])
-        elif not server_kwargs:
-            subprocess.run([sys.executable, execute_path,
-                           f"--server-config={config}"])
-        elif not config and not server_kwargs:
-            subprocess.run([sys.executable, execute_path])
-        else:
-            subprocess.run([sys.executable, execute_path,
-                           f"--server-config={config}", f"--server-kwargs={server_kwargs}"])
+                out = model.transcribe(audio, task=task,
+                                        language=arg_dict.pop("language"),
+                                        verbose=arg_dict.pop("verbose_output"))
+                basename = audio.split("/")[-1].split(".")[0]
+                path = os.path.join(out_folder, f"{basename}.{out_format}")
+                with open(path, "w") as f:
+                    f.write(out)
 
 
 if __name__ == "__main__":

From 754d0e9b8474655538ff365d8355d70949ea6f2f Mon Sep 17 00:00:00 2001
From: "Schmieder, Jacob" <jacob.schmieder@dbfz.de>
Date: Thu, 30 May 2024 14:23:05 +0000
Subject: [PATCH 2/4] removed unused packages

---
 scraibe/cli.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/scraibe/cli.py b/scraibe/cli.py
index 9dfe395..c07f90f 100644
--- a/scraibe/cli.py
+++ b/scraibe/cli.py
@@ -5,17 +5,12 @@ The function includes arguments for specifying the audio files, model paths,
 output formats, and other options necessary for transcription.
 """
 import os
-from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 import json
-
-from .autotranscript import Scraibe
-from .misc import ParseKwargs
-
-
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
 from torch.cuda import is_available
 from torch import set_num_threads
-
+from .autotranscript import Scraibe
 
 def cli():
     """
@@ -149,6 +144,5 @@ def cli():
                 with open(path, "w") as f:
                     f.write(out)
 
-
 if __name__ == "__main__":
     cli()

From dee6907be54a8e97e76bda2aa880d9ceb139e8cf Mon Sep 17 00:00:00 2001
From: "Schmieder, Jacob" <jacob.schmieder@dbfz.de>
Date: Thu, 30 May 2024 14:23:40 +0000
Subject: [PATCH 3/4] removed comment

---
 scraibe/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scraibe/cli.py b/scraibe/cli.py
index c07f90f..eece1bb 100644
--- a/scraibe/cli.py
+++ b/scraibe/cli.py
@@ -65,7 +65,7 @@ def cli():
     parser.add_argument("--verbose-output", type=str2bool, default=True,
                         help="Enable or disable progress and debug messages.")
 
-    parser.add_argument("--task", type=str, default='autotranscribe',  # unifinished code
+    parser.add_argument("--task", type=str, default='autotranscribe',
                         choices=["autotranscribe", "diarization",
                                  "autotranscribe+translate", "translate", 'transcribe'],
                         help="Choose to perform transcription, diarization, or translation. \

From 5ec66effc2eba6939f0dd90e9cd2ab4d245358db Mon Sep 17 00:00:00 2001
From: "Schmieder, Jacob" <jacob.schmieder@dbfz.de>
Date: Thu, 30 May 2024 14:50:06 +0000
Subject: [PATCH 4/4] added whisper type to cli

---
 scraibe/cli.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scraibe/cli.py b/scraibe/cli.py
index eece1bb..ee40c8b 100644
--- a/scraibe/cli.py
+++ b/scraibe/cli.py
@@ -35,6 +35,10 @@ def cli():
     parser.add_argument("-f", "--audio-files", nargs="+", type=str, default=None,
                         help="List of audio files to transcribe.")
 
+    parser.add_argument("--whisper-type", type=str, default="whisper",
+                        choices=["whisper", "whisperx"],
+                        help="Type of Whisper model to use ('whisper' or 'whisperx').")
+    
     parser.add_argument("--whisper-model-name", default="medium",
                         help="Name of the Whisper model to use.")
 
@@ -92,8 +96,10 @@ def cli():
         set_num_threads(arg_dict.pop("num_threads"))
 
     class_kwargs = {'whisper_model': arg_dict.pop("whisper_model_name"),
+                    'whisper_type':arg_dict.pop("whisper_type"),
                     'dia_model': arg_dict.pop("diarization_directory"),
-                    'use_auth_token': arg_dict.pop("hf_token")}
+                    'use_auth_token': arg_dict.pop("hf_token"),
+                    }
 
     if arg_dict["whisper_model_directory"]:
         class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")