Auto fixes from PEP8, fixes from flake8.

This commit is contained in:
Marko Henning
2024-05-15 15:18:17 +02:00
parent 9f526a8f3b
commit 4bcd28d0ea
15 changed files with 391 additions and 417 deletions
+66 -58
View File
@@ -4,7 +4,7 @@ allowing for user interaction to transcribe and diarize audio files.
The function includes arguments for specifying the audio files, model paths,
output formats, and other options necessary for transcription.
"""
import os
import os
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import json
@@ -12,7 +12,7 @@ from .autotranscript import Scraibe
from .misc import ParseKwargs
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE
from torch.cuda import is_available
from torch import set_num_threads
@@ -26,42 +26,43 @@ def cli():
This function can be executed from the command line to perform transcription tasks, providing a
user-friendly way to access the Scraibe class functionalities.
"""
def str2bool(string):
str2val = {"True": True, "False": False}
if string in str2val:
return str2val[string]
else:
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
raise ValueError(
f"Expected one of {set(str2val.keys())}, got {string}")
parser = ArgumentParser(formatter_class = ArgumentDefaultsHelpFormatter)
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
group = parser.add_mutually_exclusive_group()
parser.add_argument("-f","--audio-files", nargs="+", type=str, default=None,
parser.add_argument("-f", "--audio-files", nargs="+", type=str, default=None,
help="List of audio files to transcribe.")
group.add_argument('--start-server', action='store_true',
help='Start the Gradio app.' \
'If set, all other arguments are ignored' \
'besides --server-config or --server-kwargs.')
parser.add_argument("--server-config", type=str, default= None,
help='Start the Gradio app.'
'If set, all other arguments are ignored'
'besides --server-config or --server-kwargs.')
parser.add_argument("--server-config", type=str, default=None,
help="Path to the configy.yml file.")
parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={},
help='Keyword arguments for the Gradio app.')
parser.add_argument("--whisper-model-name", default="medium",
help="Name of the Whisper model to use.")
parser.add_argument("--whisper-model-directory", type=str, default= None,
parser.add_argument("--whisper-model-directory", type=str, default=None,
help="Path to save Whisper model files; defaults to ./models/whisper.")
parser.add_argument("--diarization-directory", type=str, default= None,
parser.add_argument("--diarization-directory", type=str, default=None,
help="Path to the diarization model directory.")
parser.add_argument("--hf-token", default= None, type=str,
parser.add_argument("--hf-token", default=None, type=str,
help="HuggingFace token for private model download.")
parser.add_argument("--inference-device",
@@ -82,105 +83,112 @@ def cli():
parser.add_argument("--verbose-output", type=str2bool, default=True,
help="Enable or disable progress and debug messages.")
parser.add_argument("--task", type=str, default= 'autotranscribe', # unifinished code
parser.add_argument("--task", type=str, default='autotranscribe', # unifinished code
choices=["autotranscribe", "diarization",
"autotranscribe+translate", "translate", 'transcribe'],
help="Choose to perform transcription, diarization, or translation. \
If set to translate, the output will be translated to English.")
parser.add_argument("--language", type=str, default=None,
choices=sorted(LANGUAGES.keys()) + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
choices=sorted(
LANGUAGES.keys()) + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
help="Language spoken in the audio. Specify None to perform language detection.")
args = parser.parse_args()
arg_dict = vars(args)
# configure output
out_folder = arg_dict.pop("output_directory")
os.makedirs(out_folder, exist_ok=True)
out_format = arg_dict.pop("output_format")
# seup server arg:
# seup server arg:
start_server = arg_dict.pop("start_server")
task = arg_dict.pop("task")
if args.num_threads > 0:
set_num_threads(arg_dict.pop("num_threads"))
class_kwargs = {'whisper_model' : arg_dict.pop("whisper_model_name"),
class_kwargs = {'whisper_model': arg_dict.pop("whisper_model_name"),
'dia_model': arg_dict.pop("diarization_directory"),
'use_auth_token' : arg_dict.pop("hf_token")}
'use_auth_token': arg_dict.pop("hf_token")}
if arg_dict["whisper_model_directory"]:
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
if not start_server:
model = Scraibe(**class_kwargs)
if arg_dict["audio_files"]:
audio_files = arg_dict.pop("audio_files")
if task == "autotranscribe" or task == "autotranscribe+translate":
for audio in audio_files:
if task == "autotranscribe+translate":
task = "translate"
else:
task = "transcribe"
out = model.autotranscribe(audio,task = task, language=arg_dict.pop("language"), verbose = arg_dict.pop("verbose_output"))
out = model.autotranscribe(audio, task=task, language=arg_dict.pop(
"language"), verbose=arg_dict.pop("verbose_output"))
basename = audio.split("/")[-1].split(".")[0]
print(f'Saving {basename}.{out_format} to {out_folder}')
out.save(os.path.join(out_folder, f"{basename}.{out_format}"))
out.save(os.path.join(
out_folder, f"{basename}.{out_format}"))
elif task == "diarization":
for audio in audio_files:
if arg_dict.pop("verbose_output"):
print(f"Verbose not implemented for diarization.")
print("Verbose not implemented for diarization.")
out = model.diarization(audio)
basename = audio.split("/")[-1].split(".")[0]
path = os.path.join(out_folder, f"{basename}.{out_format}")
print(f'Saving {basename}.{out_format} to {out_folder}')
with open(path, "w") as f:
json.dump(json.dumps(out, indent= 1), f)
json.dump(json.dumps(out, indent=1), f)
elif task == "transcribe" or task == "translate":
for audio in audio_files:
out = model.transcribe(audio, task = task,
language= arg_dict.pop("language"),
verbose = arg_dict.pop("verbose_output"))
out = model.transcribe(audio, task=task,
language=arg_dict.pop("language"),
verbose=arg_dict.pop("verbose_output"))
basename = audio.split("/")[-1].split(".")[0]
path = os.path.join(out_folder, f"{basename}.{out_format}")
with open(path, "w") as f:
f.write(out)
else: # unfinished code
f.write(out)
else: # unfinished code
raise NotImplementedError("Currently not Working")
import subprocess
import sys
execute_path = os.path.join(os.path.dirname(__file__), "app/app_starter.py")
execute_path = os.path.join(
os.path.dirname(__file__), "app/app_starter.py")
config = arg_dict.pop("server_config")
server_kwargs = arg_dict.pop("server_kwargs")
if not config:
subprocess.run([sys.executable, execute_path, f"--server-kwargs={server_kwargs}"])
subprocess.run([sys.executable, execute_path,
f"--server-kwargs={server_kwargs}"])
elif not server_kwargs:
subprocess.run([sys.executable, execute_path, f"--server-config={config}"])
subprocess.run([sys.executable, execute_path,
f"--server-config={config}"])
elif not config and not server_kwargs:
subprocess.run([sys.executable, execute_path])
else:
subprocess.run([sys.executable, execute_path, f"--server-config={config}", f"--server-kwargs={server_kwargs}"])
subprocess.run([sys.executable, execute_path,
f"--server-config={config}", f"--server-kwargs={server_kwargs}"])
if __name__ == "__main__":
cli()
cli()