Auto fixes from PEP8, fixes from flake8.

2024-05-15 15:18:17 +02:00
parent 9f526a8f3b
commit 4bcd28d0ea
15 changed files with 391 additions and 417 deletions
@@ -4,7 +4,7 @@ allowing for user interaction to transcribe and diarize audio files.
 The function includes arguments for specifying the audio files, model paths,
 output formats, and other options necessary for transcription.
 """
-import os 
+import os
 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 import json

@@ -12,7 +12,7 @@ from .autotranscript import Scraibe
 from .misc import ParseKwargs


-from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
+from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
 from torch.cuda import is_available
 from torch import set_num_threads

@@ -26,42 +26,43 @@ def cli():
    This function can be executed from the command line to perform transcription tasks, providing a 
    user-friendly way to access the Scraibe class functionalities.
    """
- 
+
    def str2bool(string):
        str2val = {"True": True, "False": False}
        if string in str2val:
            return str2val[string]
        else:
-            raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
+            raise ValueError(
+                f"Expected one of {set(str2val.keys())}, got {string}")

-    parser = ArgumentParser(formatter_class = ArgumentDefaultsHelpFormatter)
+    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)

    group = parser.add_mutually_exclusive_group()
-    
-    parser.add_argument("-f","--audio-files", nargs="+", type=str, default=None,
+
+    parser.add_argument("-f", "--audio-files", nargs="+", type=str, default=None,
                        help="List of audio files to transcribe.")
-    
+
    group.add_argument('--start-server', action='store_true',
-                        help='Start the Gradio app.' \
-                        'If set, all other arguments are ignored' \
-                        'besides --server-config or --server-kwargs.')
-    
-    parser.add_argument("--server-config", type=str, default= None,
+                       help='Start the Gradio app.'
+                       'If set, all other arguments are ignored'
+                       'besides --server-config or --server-kwargs.')
+
+    parser.add_argument("--server-config", type=str, default=None,
                        help="Path to the configy.yml file.")
-    
+
    parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={},
                        help='Keyword arguments for the Gradio app.')
-                        
+
    parser.add_argument("--whisper-model-name", default="medium",
                        help="Name of the Whisper model to use.")

-    parser.add_argument("--whisper-model-directory", type=str, default= None,
+    parser.add_argument("--whisper-model-directory", type=str, default=None,
                        help="Path to save Whisper model files; defaults to ./models/whisper.")

-    parser.add_argument("--diarization-directory", type=str, default= None,
+    parser.add_argument("--diarization-directory", type=str, default=None,
                        help="Path to the diarization model directory.")

-    parser.add_argument("--hf-token", default= None, type=str,
+    parser.add_argument("--hf-token", default=None, type=str,
                        help="HuggingFace token for private model download.")

    parser.add_argument("--inference-device",
@@ -82,105 +83,112 @@ def cli():
    parser.add_argument("--verbose-output", type=str2bool, default=True,
                        help="Enable or disable progress and debug messages.")

-    parser.add_argument("--task", type=str, default= 'autotranscribe', # unifinished code
+    parser.add_argument("--task", type=str, default='autotranscribe',  # unifinished code
                        choices=["autotranscribe", "diarization",
                                 "autotranscribe+translate", "translate", 'transcribe'],
                        help="Choose to perform transcription, diarization, or translation. \
                        If set to translate, the output will be translated to English.")

    parser.add_argument("--language", type=str, default=None,
-                        choices=sorted(LANGUAGES.keys()) + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
+                        choices=sorted(
+                            LANGUAGES.keys()) + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
                        help="Language spoken in the audio. Specify None to perform language detection.")

    args = parser.parse_args()
-    
+
    arg_dict = vars(args)
-    
+
    # configure output
    out_folder = arg_dict.pop("output_directory")
    os.makedirs(out_folder, exist_ok=True)

    out_format = arg_dict.pop("output_format")
-    
-    # seup server arg: 
+
+    # seup server arg:
    start_server = arg_dict.pop("start_server")
-    
+
    task = arg_dict.pop("task")
-    
+
    if args.num_threads > 0:
        set_num_threads(arg_dict.pop("num_threads"))
-    
-    class_kwargs = {'whisper_model' : arg_dict.pop("whisper_model_name"),
+
+    class_kwargs = {'whisper_model': arg_dict.pop("whisper_model_name"),
                    'dia_model': arg_dict.pop("diarization_directory"),
-                    'use_auth_token' : arg_dict.pop("hf_token")}
-    
+                    'use_auth_token': arg_dict.pop("hf_token")}
+
    if arg_dict["whisper_model_directory"]:
        class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")

    if not start_server:
-        
+
        model = Scraibe(**class_kwargs)

        if arg_dict["audio_files"]:
            audio_files = arg_dict.pop("audio_files")
-            
+
            if task == "autotranscribe" or task == "autotranscribe+translate":
                for audio in audio_files:
                    if task == "autotranscribe+translate":
                        task = "translate"
                    else:
                        task = "transcribe"
-                        
-                    out = model.autotranscribe(audio,task = task, language=arg_dict.pop("language"), verbose = arg_dict.pop("verbose_output"))
+
+                    out = model.autotranscribe(audio, task=task, language=arg_dict.pop(
+                        "language"), verbose=arg_dict.pop("verbose_output"))
                    basename = audio.split("/")[-1].split(".")[0]
                    print(f'Saving {basename}.{out_format} to {out_folder}')
-                    out.save(os.path.join(out_folder, f"{basename}.{out_format}"))
-                    
+                    out.save(os.path.join(
+                        out_folder, f"{basename}.{out_format}"))
+
            elif task == "diarization":
                for audio in audio_files:
                    if arg_dict.pop("verbose_output"):
-                        print(f"Verbose not implemented for diarization.")
-                        
+                        print("Verbose not implemented for diarization.")
+
                    out = model.diarization(audio)
                    basename = audio.split("/")[-1].split(".")[0]
                    path = os.path.join(out_folder, f"{basename}.{out_format}")
-                    
+
                    print(f'Saving {basename}.{out_format} to {out_folder}')
-                    
+
                    with open(path, "w") as f:
-                        json.dump(json.dumps(out, indent= 1), f)
+                        json.dump(json.dumps(out, indent=1), f)

            elif task == "transcribe" or task == "translate":
-                
+
                for audio in audio_files:
-    
-                    out = model.transcribe(audio, task = task,
-                                        language= arg_dict.pop("language"),
-                                        verbose = arg_dict.pop("verbose_output"))
+
+                    out = model.transcribe(audio, task=task,
+                                           language=arg_dict.pop("language"),
+                                           verbose=arg_dict.pop("verbose_output"))
                    basename = audio.split("/")[-1].split(".")[0]
                    path = os.path.join(out_folder, f"{basename}.{out_format}")
                    with open(path, "w") as f:
-                        f.write(out)  
-                    
-    
-    else: # unfinished code
+                        f.write(out)
+
+    else:  # unfinished code
        raise NotImplementedError("Currently not Working")
        import subprocess
        import sys
-        
-        execute_path = os.path.join(os.path.dirname(__file__), "app/app_starter.py")
-        
+
+        execute_path = os.path.join(
+            os.path.dirname(__file__), "app/app_starter.py")
+
        config = arg_dict.pop("server_config")
        server_kwargs = arg_dict.pop("server_kwargs")
-        
+
        if not config:
-            subprocess.run([sys.executable, execute_path, f"--server-kwargs={server_kwargs}"])
+            subprocess.run([sys.executable, execute_path,
+                           f"--server-kwargs={server_kwargs}"])
        elif not server_kwargs:
-            subprocess.run([sys.executable, execute_path, f"--server-config={config}"])
+            subprocess.run([sys.executable, execute_path,
+                           f"--server-config={config}"])
        elif not config and not server_kwargs:
            subprocess.run([sys.executable, execute_path])
        else:
-            subprocess.run([sys.executable, execute_path, f"--server-config={config}", f"--server-kwargs={server_kwargs}"])
+            subprocess.run([sys.executable, execute_path,
+                           f"--server-config={config}", f"--server-kwargs={server_kwargs}"])
+

 if __name__ == "__main__":
-    cli()
+    cli()