@@ -10,4 +10,6 @@ from .misc import *
|
|||||||
from .app.gradio_app import *
|
from .app.gradio_app import *
|
||||||
from .app.qtfaststart import *
|
from .app.qtfaststart import *
|
||||||
|
|
||||||
|
from .cli import *
|
||||||
|
|
||||||
__version__ = _get_version()
|
__version__ = _get_version()
|
||||||
|
|||||||
@@ -24,9 +24,9 @@ Usage:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Standard Library Imports
|
# Standard Library Imports
|
||||||
import argparse
|
|
||||||
import os
|
import os
|
||||||
from glob import iglob
|
from glob import iglob
|
||||||
|
import re
|
||||||
from subprocess import run
|
from subprocess import run
|
||||||
from typing import TypeVar, Union
|
from typing import TypeVar, Union
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
@@ -93,7 +93,7 @@ class AutoTranscribe:
|
|||||||
|
|
||||||
print("AutoTranscribe initialized all models successfully loaded.")
|
print("AutoTranscribe initialized all models successfully loaded.")
|
||||||
|
|
||||||
def transcribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
||||||
remove_original : bool = False,
|
remove_original : bool = False,
|
||||||
**kwargs) -> Transcript:
|
**kwargs) -> Transcript:
|
||||||
"""
|
"""
|
||||||
@@ -164,6 +164,55 @@ class AutoTranscribe:
|
|||||||
|
|
||||||
return Transcript(final_transcript)
|
return Transcript(final_transcript)
|
||||||
|
|
||||||
|
def diarization(self, audio_file : Union[str, torch.Tensor, ndarray],
|
||||||
|
**kwargs) -> dict:
|
||||||
|
"""
|
||||||
|
Perform diarization on an audio file using the pyannote diarization model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_file (Union[str, torch.Tensor, ndarray]):
|
||||||
|
The audio source which can either be a path to the audio file or a tensor representation.
|
||||||
|
**kwargs:
|
||||||
|
Additional keyword arguments for diarization.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict:
|
||||||
|
A dictionary containing the results of the diarization process.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get audio file as an AudioProcessor object
|
||||||
|
audio_file = self.get_audio_file(audio_file)
|
||||||
|
|
||||||
|
# Prepare waveform and sample rate for diarization
|
||||||
|
dia_audio = {
|
||||||
|
"waveform" : audio_file.waveform.reshape(1,len(audio_file.waveform)),
|
||||||
|
"sample_rate": audio_file.sr
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Starting diarisation.")
|
||||||
|
|
||||||
|
diarisation = self.diariser.diarization(dia_audio, **kwargs)
|
||||||
|
|
||||||
|
return diarisation
|
||||||
|
|
||||||
|
def transcribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
||||||
|
**kwargs):
|
||||||
|
"""
|
||||||
|
Transcribe the provided audio file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_file (Union[str, torch.Tensor, ndarray]):
|
||||||
|
The audio source, which can either be a path or a tensor representation.
|
||||||
|
**kwargs:
|
||||||
|
Additional keyword arguments for transcription.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str:
|
||||||
|
The transcribed text from the audio source.
|
||||||
|
"""
|
||||||
|
audio_file = self.get_audio_file(audio_file)
|
||||||
|
|
||||||
|
return self.transcriber.transcribe(audio_file.waveform, **kwargs)
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_audio_file(audio_file : str,
|
def remove_audio_file(audio_file : str,
|
||||||
shred : bool = False) -> None:
|
shred : bool = False) -> None:
|
||||||
@@ -228,133 +277,3 @@ class AutoTranscribe:
|
|||||||
raise ValueError(f'Audiofile must be of type AudioProcessor,' \
|
raise ValueError(f'Audiofile must be of type AudioProcessor,' \
|
||||||
f'not {type(audio_file)}')
|
f'not {type(audio_file)}')
|
||||||
return audio_file
|
return audio_file
|
||||||
|
|
||||||
|
|
||||||
def cli():
|
|
||||||
"""
|
|
||||||
Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe
|
|
||||||
and diarize audio files. The function includes arguments for specifying the audio files, model paths,
|
|
||||||
output formats, and other options necessary for transcription.
|
|
||||||
|
|
||||||
This function can be executed from the command line to perform transcription tasks, providing a
|
|
||||||
user-friendly way to access the AutoTranscribe class functionalities.
|
|
||||||
"""
|
|
||||||
from whisper import available_models
|
|
||||||
from whisper.utils import get_writer
|
|
||||||
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
|
||||||
from .transcriber import WHISPER_DEFAULT_PATH
|
|
||||||
from .diarisation import PYANNOTE_DEFAULT_PATH
|
|
||||||
|
|
||||||
def str2bool(string):
|
|
||||||
str2val = {"True": True, "False": False}
|
|
||||||
if string in str2val:
|
|
||||||
return str2val[string]
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
||||||
|
|
||||||
parser.add_argument("-f","--audio_files", nargs="+", type=str,
|
|
||||||
help="List of audio files to transcribe.")
|
|
||||||
|
|
||||||
parser.add_argument('--start_server', action='store_true',
|
|
||||||
help='Start the Gradio app.')
|
|
||||||
|
|
||||||
parser.add_argument("--whisper_model_name", default="medium",
|
|
||||||
help="Name of the Whisper model to use.")
|
|
||||||
|
|
||||||
parser.add_argument("--whisper_model_directory", type=str, default=WHISPER_DEFAULT_PATH,
|
|
||||||
help="Path to save Whisper model files; defaults to ./models/whisper.")
|
|
||||||
|
|
||||||
parser.add_argument("--diarization_directory", type=str, default=PYANNOTE_DEFAULT_PATH,
|
|
||||||
help="Path to the diarization model directory.")
|
|
||||||
|
|
||||||
parser.add_argument("--huggingface_token", default="", type=str,
|
|
||||||
help="HuggingFace token for private model download.")
|
|
||||||
|
|
||||||
parser.add_argument("--allow_download", type=str2bool, default=False,
|
|
||||||
help="Allow model download if not found locally.")
|
|
||||||
|
|
||||||
parser.add_argument("--inference_device",
|
|
||||||
default="cuda" if torch.cuda.is_available() else "cpu",
|
|
||||||
help="Device to use for PyTorch inference.")
|
|
||||||
|
|
||||||
parser.add_argument("--num_threads", type=int, default=0,
|
|
||||||
help="Number of threads used by torch for CPU inference; overrides MKL_NUM_THREADS/OMP_NUM_THREADS.")
|
|
||||||
|
|
||||||
parser.add_argument("--output_directory", "-o", type=str, default=".",
|
|
||||||
help="Directory to save the transcription outputs.")
|
|
||||||
|
|
||||||
parser.add_argument("--output_format", "-f", type=str, default="txt",
|
|
||||||
choices=["txt", "json", "md", "html"],
|
|
||||||
help="Format of the output file; defaults to txt.")
|
|
||||||
|
|
||||||
parser.add_argument("--verbose_output", type=str2bool, default=True,
|
|
||||||
help="Enable or disable progress and debug messages.")
|
|
||||||
|
|
||||||
parser.add_argument("--transcription_task", type=str, default="transcribe",
|
|
||||||
choices=["transcribe", "diarize", "wtranscribe"],
|
|
||||||
help="Choose to perform transcription, diarization, or Whisper transcription.")
|
|
||||||
|
|
||||||
parser.add_argument("--spoken_language", type=str, default=None,
|
|
||||||
choices=sorted(LANGUAGES.keys()) + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
|
|
||||||
help="Language spoken in the audio. Specify None to perform language detection.")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
output_directory = args.output_directory
|
|
||||||
num_threads = args.num_threads
|
|
||||||
whisper_model_directory = args.whisper_model_directory
|
|
||||||
allow_download = args.allow_download
|
|
||||||
inference_device = args.inference_device
|
|
||||||
whisper_model_name = args.whisper_model_name
|
|
||||||
diarization_directory = args.diarization_directory
|
|
||||||
huggingface_token = args.huggingface_token
|
|
||||||
transcription_task = args.transcription_task
|
|
||||||
audio_files = args.audio_files
|
|
||||||
spoken_language = args.spoken_language
|
|
||||||
output_format = args.output_format
|
|
||||||
start_server = args.start_server
|
|
||||||
|
|
||||||
os.makedirs(output_directory, exist_ok=True)
|
|
||||||
|
|
||||||
if num_threads > 0:
|
|
||||||
torch.set_num_threads(num_threads)
|
|
||||||
|
|
||||||
whisper_kwargs = {
|
|
||||||
"download_root": whisper_model_directory,
|
|
||||||
"local": allow_download,
|
|
||||||
"device": inference_device
|
|
||||||
}
|
|
||||||
|
|
||||||
diarisation_kwargs = {
|
|
||||||
"local": allow_download,
|
|
||||||
"token": huggingface_token
|
|
||||||
}
|
|
||||||
|
|
||||||
model = AutoTranscribe(whisper_model=whisper_model_name,
|
|
||||||
whisper_kwargs=whisper_kwargs,
|
|
||||||
dia_model=diarization_directory,
|
|
||||||
dia_kwargs=diarisation_kwargs)
|
|
||||||
|
|
||||||
if transcription_task == "transcribe":
|
|
||||||
for audio in audio_files:
|
|
||||||
out = model.transcribe(audio, language=spoken_language)
|
|
||||||
basename = audio.split("/")[-1].split(".")[0]
|
|
||||||
spath = f"{output_directory}/{basename}.{output_format}"
|
|
||||||
out.save(spath)
|
|
||||||
|
|
||||||
# ... include other tasks here ...
|
|
||||||
elif transcription_task == "diarize":
|
|
||||||
# diarize code here
|
|
||||||
pass
|
|
||||||
elif transcription_task == "wtranscribe":
|
|
||||||
# wtranscribe code here
|
|
||||||
pass
|
|
||||||
|
|
||||||
if start_server:
|
|
||||||
from .app.gradio_app import gradio_app
|
|
||||||
gradio_app(model)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
cli()
|
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
"""
|
||||||
|
Command-Line Interface (CLI) for the AutoTranscribe class,
|
||||||
|
allowing for user interaction to transcribe and diarize audio files.
|
||||||
|
The function includes arguments for specifying the audio files, model paths,
|
||||||
|
output formats, and other options necessary for transcription.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .transcriber import WHISPER_DEFAULT_PATH
|
||||||
|
from .diarisation import PYANNOTE_DEFAULT_PATH
|
||||||
|
from .autotranscript import AutoTranscribe
|
||||||
|
|
||||||
|
from whisper import available_models
|
||||||
|
from whisper.utils import get_writer
|
||||||
|
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
||||||
|
from torch.cuda import is_available
|
||||||
|
from torch import set_num_threads
|
||||||
|
|
||||||
|
|
||||||
|
def cli():
|
||||||
|
"""
|
||||||
|
Command-Line Interface (CLI) for the AutoTranscribe class, allowing for user interaction to transcribe
|
||||||
|
and diarize audio files. The function includes arguments for specifying the audio files, model paths,
|
||||||
|
output formats, and other options necessary for transcription.
|
||||||
|
|
||||||
|
This function can be executed from the command line to perform transcription tasks, providing a
|
||||||
|
user-friendly way to access the AutoTranscribe class functionalities.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def str2bool(string):
|
||||||
|
str2val = {"True": True, "False": False}
|
||||||
|
if string in str2val:
|
||||||
|
return str2val[string]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
|
||||||
|
|
||||||
|
parser = ArgumentParser(formatter_class = ArgumentDefaultsHelpFormatter)
|
||||||
|
|
||||||
|
group = parser.add_mutually_exclusive_group()
|
||||||
|
|
||||||
|
parser.add_argument("-f","--audio_files", nargs="+", type=str, default=None,
|
||||||
|
help="List of audio files to transcribe.")
|
||||||
|
|
||||||
|
group.add_argument('--start_server', action='store_true',
|
||||||
|
help='Start the Gradio app.')
|
||||||
|
|
||||||
|
parser.add_argument("--port", type=int, default= None,
|
||||||
|
help="Port to run the Gradio app on.")
|
||||||
|
|
||||||
|
parser.add_argument("--server_name", type=str, default= "autotranscript",
|
||||||
|
help="Name of the Gradio app.")
|
||||||
|
|
||||||
|
parser.add_argument("--whisper_model_name", default="medium",
|
||||||
|
help="Name of the Whisper model to use.")
|
||||||
|
|
||||||
|
parser.add_argument("--whisper_model_directory", type=str, default= None,
|
||||||
|
help="Path to save Whisper model files; defaults to ./models/whisper.")
|
||||||
|
|
||||||
|
parser.add_argument("--diarization_directory", type=str, default= None,
|
||||||
|
help="Path to the diarization model directory.")
|
||||||
|
|
||||||
|
parser.add_argument("--huggingface_token", default= None, type=str,
|
||||||
|
help="HuggingFace token for private model download.")
|
||||||
|
|
||||||
|
parser.add_argument("--allow_download", type=str2bool, default=True,
|
||||||
|
help="Allow model download if not found locally.")
|
||||||
|
|
||||||
|
parser.add_argument("--inference_device",
|
||||||
|
default="cuda" if is_available() else "cpu",
|
||||||
|
help="Device to use for PyTorch inference.")
|
||||||
|
|
||||||
|
parser.add_argument("--num_threads", type=int, default=0,
|
||||||
|
help="Number of threads used by torch for CPU inference; overrides MKL_NUM_THREADS/OMP_NUM_THREADS.")
|
||||||
|
|
||||||
|
parser.add_argument("--output_directory", "-o", type=str, default=".",
|
||||||
|
help="Directory to save the transcription outputs.")
|
||||||
|
|
||||||
|
parser.add_argument("--output_format", "-of", type=str, default="txt",
|
||||||
|
choices=["txt", "json", "md", "html"],
|
||||||
|
help="Format of the output file; defaults to txt.")
|
||||||
|
|
||||||
|
parser.add_argument("--verbose_output", type=str2bool, default=True,
|
||||||
|
help="Enable or disable progress and debug messages.")
|
||||||
|
|
||||||
|
parser.add_argument("--task", type=str, default= None, # unifinished code
|
||||||
|
choices=["autotranscribe", "diarization", "autotranscribe+translate", "translate"],
|
||||||
|
help="Choose to perform transcription, diarization, or translation. \
|
||||||
|
If set to translate, the language argument must be specified.")
|
||||||
|
|
||||||
|
parser.add_argument("--language", type=str, default=None,
|
||||||
|
choices=sorted(LANGUAGES.keys()) + sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
|
||||||
|
help="Language spoken in the audio. Specify None to perform language detection.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
arg_dict = vars(args)
|
||||||
|
|
||||||
|
# configure output
|
||||||
|
out_folder = arg_dict.pop("output_directory")
|
||||||
|
os.makedirs(out_folder, exist_ok=True)
|
||||||
|
|
||||||
|
out_format = arg_dict.pop("output_format")
|
||||||
|
|
||||||
|
# seup server arg:
|
||||||
|
start_server = arg_dict.pop("start_server")
|
||||||
|
|
||||||
|
task = arg_dict.pop("task")
|
||||||
|
|
||||||
|
if args.num_threads > 0:
|
||||||
|
set_num_threads(arg_dict.pop("num_threads"))
|
||||||
|
|
||||||
|
class_kwargs = dict()
|
||||||
|
|
||||||
|
for k, v in arg_dict.items():
|
||||||
|
if v is not None:
|
||||||
|
class_kwargs[k] = v
|
||||||
|
|
||||||
|
|
||||||
|
model = AutoTranscribe(**class_kwargs)
|
||||||
|
|
||||||
|
if arg_dict["audio_files"]:
|
||||||
|
audio_files = args.pop("audio_files")
|
||||||
|
|
||||||
|
if task == "autotranscribe" or task == "autotranscribe+translate":
|
||||||
|
for audio in audio_files:
|
||||||
|
if task == "autotranscribe+translate":
|
||||||
|
task = "translate"
|
||||||
|
else:
|
||||||
|
task = "transcribe"
|
||||||
|
|
||||||
|
out = model.autotranscribe(audio,task = task, language=arg_dict.pop("language"), verbose = arg_dict.pop("verbose_output"))
|
||||||
|
basename = audio.split("/")[-1].split(".")[0]
|
||||||
|
out.save(os.path.join(out_folder, f"{basename}.{out_format}"))
|
||||||
|
|
||||||
|
elif task == "diarization":
|
||||||
|
for audio in audio_files:
|
||||||
|
if arg_dict.pop("verbose_output"):
|
||||||
|
print(f"Verbose not implemented for diarization.")
|
||||||
|
|
||||||
|
out = model.diarization(audio)
|
||||||
|
basename = audio.split("/")[-1].split(".")[0]
|
||||||
|
path = os.path.join(out_folder, f"{basename}.{out_format}")
|
||||||
|
if out_format == "txt":
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(out)
|
||||||
|
elif out_format == "json":
|
||||||
|
with open(path, "w") as f:
|
||||||
|
json.dump(json.dumps(out, indent= 3), f)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported output format for diarization{out_format}.")
|
||||||
|
elif task == "transcribe" or task == "translate":
|
||||||
|
|
||||||
|
for audio in audio_files:
|
||||||
|
|
||||||
|
out = model.transcribe(audio, task = task,
|
||||||
|
language=arg_dict.pop("language"),
|
||||||
|
verbose = arg_dict.pop("verbose_output"))
|
||||||
|
basename = audio.split("/")[-1].split(".")[0]
|
||||||
|
path = os.path.join(out_folder, f"{basename}.{out_format}")
|
||||||
|
with open(path, "w") as f:
|
||||||
|
f.write(out)
|
||||||
|
|
||||||
|
|
||||||
|
if start_server: # unfinished code
|
||||||
|
from .gradio_app import gradio_app
|
||||||
|
gradio_app(model)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cli()
|
||||||
@@ -167,6 +167,9 @@ class Transcriber:
|
|||||||
|
|
||||||
whisper_kwargs = {k: v for k, v in kwargs.items() if k in _possible_kwargs}
|
whisper_kwargs = {k: v for k, v in kwargs.items() if k in _possible_kwargs}
|
||||||
|
|
||||||
|
if (task := kwargs.get("task")):
|
||||||
|
whisper_kwargs["task"] = task
|
||||||
|
|
||||||
return whisper_kwargs
|
return whisper_kwargs
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
|
|||||||
+59
-10
@@ -1,4 +1,3 @@
|
|||||||
name: whisper
|
|
||||||
channels:
|
channels:
|
||||||
- pytorch
|
- pytorch
|
||||||
- defaults
|
- defaults
|
||||||
@@ -11,7 +10,7 @@ dependencies:
|
|||||||
- ca-certificates=2023.05.30=h06a4308_0
|
- ca-certificates=2023.05.30=h06a4308_0
|
||||||
- certifi=2023.5.7=py39h06a4308_0
|
- certifi=2023.5.7=py39h06a4308_0
|
||||||
- cffi=1.15.1=py39h5eee18b_3
|
- cffi=1.15.1=py39h5eee18b_3
|
||||||
- cryptography=39.0.1=py39h9ce1e76_0
|
- cryptography=39.0.1=py39h9ce1e76_2
|
||||||
- cudatoolkit=11.3.1=h2bc3f7f_2
|
- cudatoolkit=11.3.1=h2bc3f7f_2
|
||||||
- ffmpeg=4.2.2=h20bf706_0
|
- ffmpeg=4.2.2=h20bf706_0
|
||||||
- flit-core=3.8.0=py39h06a4308_0
|
- flit-core=3.8.0=py39h06a4308_0
|
||||||
@@ -51,36 +50,40 @@ dependencies:
|
|||||||
- numpy=1.23.5=py39h14f4228_0
|
- numpy=1.23.5=py39h14f4228_0
|
||||||
- numpy-base=1.23.5=py39h31eccc5_0
|
- numpy-base=1.23.5=py39h31eccc5_0
|
||||||
- openh264=2.1.1=h4ff587b_0
|
- openh264=2.1.1=h4ff587b_0
|
||||||
- openssl=1.1.1t=h7f8727e_0
|
- openssl=3.0.9=h7f8727e_0
|
||||||
- pillow=9.4.0=py39h6a678d5_0
|
- pillow=9.4.0=py39h6a678d5_0
|
||||||
- pip=23.0.1=py39h06a4308_0
|
- pip=23.0.1=py39h06a4308_0
|
||||||
- pycparser=2.21=pyhd3eb1b0_0
|
- pycparser=2.21=pyhd3eb1b0_0
|
||||||
- pyopenssl=23.0.0=py39h06a4308_0
|
- pyopenssl=23.0.0=py39h06a4308_0
|
||||||
- pysocks=1.7.1=py39h06a4308_0
|
- pysocks=1.7.1=py39h06a4308_0
|
||||||
- python=3.9.16=h7a1cb2a_2
|
- python=3.9.16=h955ad1f_3
|
||||||
- pytorch=1.11.0=py3.9_cuda11.3_cudnn8.2.0_0
|
- pytorch=1.11.0=py3.9_cuda11.3_cudnn8.2.0_0
|
||||||
- pytorch-mutex=1.0=cuda
|
- pytorch-mutex=1.0=cuda
|
||||||
- readline=8.2=h5eee18b_0
|
- readline=8.2=h5eee18b_0
|
||||||
- requests=2.28.1=py39h06a4308_1
|
- requests=2.28.1=py39h06a4308_1
|
||||||
- setuptools=65.6.3=py39h06a4308_0
|
- setuptools=65.6.3=py39h06a4308_0
|
||||||
- six=1.16.0=pyhd3eb1b0_1
|
- six=1.16.0=pyhd3eb1b0_1
|
||||||
- sqlite=3.41.1=h5eee18b_0
|
- sqlite=3.41.2=h5eee18b_0
|
||||||
- tk=8.6.12=h1ccaba5_0
|
- tk=8.6.12=h1ccaba5_0
|
||||||
- torchaudio=0.11.0=py39_cu113
|
- torchaudio=0.11.0=py39_cu113
|
||||||
- torchvision=0.12.0=py39_cu113
|
- torchvision=0.12.0=py39_cu113
|
||||||
- typing_extensions=4.4.0=py39h06a4308_0
|
|
||||||
- tzdata=2023c=h04d1e81_0
|
- tzdata=2023c=h04d1e81_0
|
||||||
- wheel=0.38.4=py39h06a4308_0
|
- wheel=0.38.4=py39h06a4308_0
|
||||||
- x264=1!157.20191217=h7b6447c_0
|
- x264=1!157.20191217=h7b6447c_0
|
||||||
- xz=5.2.10=h5eee18b_1
|
- xz=5.4.2=h5eee18b_0
|
||||||
- zlib=1.2.13=h5eee18b_0
|
- zlib=1.2.13=h5eee18b_0
|
||||||
- zstd=1.5.4=hc292b87_0
|
- zstd=1.5.4=hc292b87_0
|
||||||
- pip:
|
- pip:
|
||||||
- absl-py==1.3.0
|
- absl-py==1.3.0
|
||||||
|
- aiofiles==23.1.0
|
||||||
- aiohttp==3.8.3
|
- aiohttp==3.8.3
|
||||||
- aiosignal==1.3.1
|
- aiosignal==1.3.1
|
||||||
- alembic==1.9.1
|
- alembic==1.9.1
|
||||||
|
- altair==5.0.1
|
||||||
|
- annotated-types==0.5.0
|
||||||
|
- ansi2html==1.8.0
|
||||||
- antlr4-python3-runtime==4.9.3
|
- antlr4-python3-runtime==4.9.3
|
||||||
|
- anyio==3.7.1
|
||||||
- appdirs==1.4.4
|
- appdirs==1.4.4
|
||||||
- asteroid-filterbanks==0.4.0
|
- asteroid-filterbanks==0.4.0
|
||||||
- async-timeout==4.0.2
|
- async-timeout==4.0.2
|
||||||
@@ -100,48 +103,76 @@ dependencies:
|
|||||||
- commonmark==0.9.1
|
- commonmark==0.9.1
|
||||||
- contourpy==1.0.6
|
- contourpy==1.0.6
|
||||||
- cycler==0.11.0
|
- cycler==0.11.0
|
||||||
|
- dash==2.12.1
|
||||||
|
- dash-core-components==2.0.0
|
||||||
|
- dash-html-components==2.0.0
|
||||||
|
- dash-table==5.0.0
|
||||||
- decorator==4.4.2
|
- decorator==4.4.2
|
||||||
- docopt==0.6.2
|
- docopt==0.6.2
|
||||||
- einops==0.3.2
|
- einops==0.3.2
|
||||||
|
- exceptiongroup==1.1.1
|
||||||
|
- fastapi==0.100.0
|
||||||
- ffmpeg-python==0.2.0
|
- ffmpeg-python==0.2.0
|
||||||
|
- ffmpy==0.3.0
|
||||||
- filelock==3.8.0
|
- filelock==3.8.0
|
||||||
|
- flask==2.2.5
|
||||||
- fonttools==4.38.0
|
- fonttools==4.38.0
|
||||||
- frozenlist==1.3.3
|
- frozenlist==1.3.3
|
||||||
- fsspec==2022.11.0
|
- fsspec==2022.11.0
|
||||||
- future==0.18.2
|
- future==0.18.2
|
||||||
- google-auth==2.15.0
|
- google-auth==2.15.0
|
||||||
- google-auth-oauthlib==0.4.6
|
- google-auth-oauthlib==0.4.6
|
||||||
|
- gradio==3.36.1
|
||||||
|
- gradio-client==0.2.7
|
||||||
- greenlet==2.0.1
|
- greenlet==2.0.1
|
||||||
- grpcio==1.51.1
|
- grpcio==1.51.1
|
||||||
|
- h11==0.14.0
|
||||||
- hmmlearn==0.2.8
|
- hmmlearn==0.2.8
|
||||||
- huggingface-hub==0.11.0
|
- httpcore==0.17.3
|
||||||
|
- httpx==0.24.1
|
||||||
|
- huggingface-hub==0.16.4
|
||||||
|
- humanize==4.7.0
|
||||||
- hyperpyyaml==1.1.0
|
- hyperpyyaml==1.1.0
|
||||||
- imageio==2.23.0
|
- imageio==2.23.0
|
||||||
- imageio-ffmpeg==0.4.7
|
- imageio-ffmpeg==0.4.7
|
||||||
- importlib-metadata==4.13.0
|
- importlib-metadata==4.13.0
|
||||||
|
- importlib-resources==5.12.0
|
||||||
|
- iniconfig==2.0.0
|
||||||
|
- itsdangerous==2.1.2
|
||||||
|
- jinja2==3.1.2
|
||||||
- joblib==1.2.0
|
- joblib==1.2.0
|
||||||
|
- jsonschema==4.18.0
|
||||||
|
- jsonschema-specifications==2023.6.1
|
||||||
- julius==0.2.7
|
- julius==0.2.7
|
||||||
- kiwisolver==1.4.4
|
- kiwisolver==1.4.4
|
||||||
- librosa==0.9.2
|
- librosa==0.9.2
|
||||||
|
- linkify-it-py==2.0.2
|
||||||
- lit==16.0.5.post0
|
- lit==16.0.5.post0
|
||||||
- llvmlite==0.39.1
|
- llvmlite==0.39.1
|
||||||
- mako==1.2.4
|
- mako==1.2.4
|
||||||
- markdown==3.4.1
|
- markdown==3.4.1
|
||||||
|
- markdown-it-py==2.2.0
|
||||||
- markupsafe==2.1.1
|
- markupsafe==2.1.1
|
||||||
- matplotlib==3.6.2
|
- matplotlib==3.7.1
|
||||||
|
- mdit-py-plugins==0.3.3
|
||||||
|
- mdurl==0.1.2
|
||||||
- more-itertools==9.0.0
|
- more-itertools==9.0.0
|
||||||
- moviepy==1.0.3
|
- moviepy==1.0.3
|
||||||
- mpmath==1.2.1
|
- mpmath==1.2.1
|
||||||
- multidict==6.0.4
|
- multidict==6.0.4
|
||||||
|
- nest-asyncio==1.5.7
|
||||||
- networkx==2.8.8
|
- networkx==2.8.8
|
||||||
- numba==0.56.4
|
- numba==0.56.4
|
||||||
- oauthlib==3.2.2
|
- oauthlib==3.2.2
|
||||||
- omegaconf==2.3.0
|
- omegaconf==2.3.0
|
||||||
- openai-whisper==20230314
|
- openai-whisper==20230314
|
||||||
- optuna==3.0.5
|
- optuna==3.0.5
|
||||||
|
- orjson==3.9.2
|
||||||
- packaging==21.3
|
- packaging==21.3
|
||||||
- pandas==1.5.2
|
- pandas==1.5.2
|
||||||
- pbr==5.11.0
|
- pbr==5.11.0
|
||||||
|
- plotly==5.15.0
|
||||||
|
- pluggy==1.0.0
|
||||||
- pooch==1.6.0
|
- pooch==1.6.0
|
||||||
- prettytable==3.5.0
|
- prettytable==3.5.0
|
||||||
- primepy==1.3
|
- primepy==1.3
|
||||||
@@ -154,23 +185,32 @@ dependencies:
|
|||||||
- pyannote-pipeline==2.3
|
- pyannote-pipeline==2.3
|
||||||
- pyasn1==0.4.8
|
- pyasn1==0.4.8
|
||||||
- pyasn1-modules==0.2.8
|
- pyasn1-modules==0.2.8
|
||||||
|
- pydantic==2.0.2
|
||||||
|
- pydantic-core==2.1.2
|
||||||
- pydeprecate==0.3.2
|
- pydeprecate==0.3.2
|
||||||
- pydub==0.25.1
|
- pydub==0.25.1
|
||||||
- pygments==2.13.0
|
- pygments==2.13.0
|
||||||
- pyparsing==3.0.9
|
- pyparsing==3.0.9
|
||||||
- pyperclip==1.8.2
|
- pyperclip==1.8.2
|
||||||
|
- pytest==7.3.1
|
||||||
- python-dateutil==2.8.2
|
- python-dateutil==2.8.2
|
||||||
|
- python-multipart==0.0.6
|
||||||
- pytorch-lightning==1.6.5
|
- pytorch-lightning==1.6.5
|
||||||
- pytorch-metric-learning==1.6.3
|
- pytorch-metric-learning==1.6.3
|
||||||
- pytz==2022.7
|
- pytz==2022.7
|
||||||
- pyyaml==6.0
|
- pyyaml==6.0
|
||||||
|
- qtfaststart==1.8
|
||||||
|
- referencing==0.29.1
|
||||||
- regex==2022.10.31
|
- regex==2022.10.31
|
||||||
- requests-oauthlib==1.3.1
|
- requests-oauthlib==1.3.1
|
||||||
- resampy==0.4.2
|
- resampy==0.4.2
|
||||||
|
- retrying==1.3.4
|
||||||
- rich==12.6.0
|
- rich==12.6.0
|
||||||
|
- rpds-py==0.8.10
|
||||||
- rsa==4.9
|
- rsa==4.9
|
||||||
- ruamel-yaml==0.17.21
|
- ruamel-yaml==0.17.21
|
||||||
- ruamel-yaml-clib==0.2.7
|
- ruamel-yaml-clib==0.2.7
|
||||||
|
- ruff==0.0.272
|
||||||
- scikit-learn==1.2.0
|
- scikit-learn==1.2.0
|
||||||
- scipy==1.8.1
|
- scipy==1.8.1
|
||||||
- semantic-version==2.10.0
|
- semantic-version==2.10.0
|
||||||
@@ -180,19 +220,24 @@ dependencies:
|
|||||||
- shellingham==1.5.0
|
- shellingham==1.5.0
|
||||||
- simplejson==3.18.0
|
- simplejson==3.18.0
|
||||||
- singledispatchmethod==1.0
|
- singledispatchmethod==1.0
|
||||||
|
- sniffio==1.3.0
|
||||||
- sortedcontainers==2.4.0
|
- sortedcontainers==2.4.0
|
||||||
- soundfile==0.10.3.post1
|
- soundfile==0.10.3.post1
|
||||||
- speechbrain==0.5.13
|
- speechbrain==0.5.14
|
||||||
- sqlalchemy==1.4.45
|
- sqlalchemy==1.4.45
|
||||||
|
- starlette==0.27.0
|
||||||
- stevedore==4.1.1
|
- stevedore==4.1.1
|
||||||
- sympy==1.11.1
|
- sympy==1.11.1
|
||||||
- tabulate==0.9.0
|
- tabulate==0.9.0
|
||||||
|
- tenacity==8.2.2
|
||||||
- tensorboard==2.11.0
|
- tensorboard==2.11.0
|
||||||
- tensorboard-data-server==0.6.1
|
- tensorboard-data-server==0.6.1
|
||||||
- tensorboard-plugin-wit==1.8.1
|
- tensorboard-plugin-wit==1.8.1
|
||||||
- threadpoolctl==3.1.0
|
- threadpoolctl==3.1.0
|
||||||
- tiktoken==0.3.1
|
- tiktoken==0.3.1
|
||||||
- tokenizers==0.13.2
|
- tokenizers==0.13.2
|
||||||
|
- tomli==2.0.1
|
||||||
|
- toolz==0.12.0
|
||||||
- torch-audiomentations==0.11.0
|
- torch-audiomentations==0.11.0
|
||||||
- torch-pitch-shift==1.2.2
|
- torch-pitch-shift==1.2.2
|
||||||
- torchmetrics==0.11.0
|
- torchmetrics==0.11.0
|
||||||
@@ -200,8 +245,12 @@ dependencies:
|
|||||||
- transformers==4.24.0
|
- transformers==4.24.0
|
||||||
- triton==2.0.0
|
- triton==2.0.0
|
||||||
- typer==0.7.0
|
- typer==0.7.0
|
||||||
|
- typing-extensions==4.7.1
|
||||||
|
- uc-micro-py==1.0.2
|
||||||
- urllib3==1.26.12
|
- urllib3==1.26.12
|
||||||
|
- uvicorn==0.22.0
|
||||||
- wcwidth==0.2.5
|
- wcwidth==0.2.5
|
||||||
|
- websockets==11.0.3
|
||||||
- werkzeug==2.2.2
|
- werkzeug==2.2.2
|
||||||
- yarl==1.8.2
|
- yarl==1.8.2
|
||||||
- zipp==3.11.0
|
- zipp==3.11.0
|
||||||
|
|||||||
@@ -11,6 +11,14 @@ setuptools-rust~=1.5.2
|
|||||||
|
|
||||||
tqdm>=4.65.0
|
tqdm>=4.65.0
|
||||||
|
|
||||||
|
gradio~=3.36.1
|
||||||
|
gradio-client~=0.2.7
|
||||||
|
|
||||||
|
# add pytorch to override the one installed by pyannote.audio
|
||||||
|
|
||||||
|
torch~=1.11.0
|
||||||
|
torchvision~=0.12.0
|
||||||
|
torchaudio~=0.11.0
|
||||||
#optional:
|
#optional:
|
||||||
#dash~=2.10.2
|
#dash~=2.10.2
|
||||||
|
|
||||||
|
|||||||
@@ -26,16 +26,19 @@ if __name__ == "__main__":
|
|||||||
name=module_name,
|
name=module_name,
|
||||||
version=version["get_version"](build_version),
|
version=version["get_version"](build_version),
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
python_requires="~=3.9",
|
python_requires=">=3.8",
|
||||||
readme="README.md",
|
readme="README.md",
|
||||||
install_requires = [str(r) for r in pkg_resources.parse_requirements(
|
install_requires = [str(r) for r in pkg_resources.parse_requirements(
|
||||||
open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
|
open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
dependency_links=[
|
||||||
|
'https://download.pytorch.org/whl/cu113',
|
||||||
|
],
|
||||||
url= github_url,
|
url= github_url,
|
||||||
license='',
|
license='',
|
||||||
author='Jacob Schmieder',
|
author='Jacob Schmieder',
|
||||||
author_email='',
|
author_email='Jacob.Schmieder@dbfz.de',
|
||||||
description='Transcription tool for audio files based on Whisper and Pyannote',
|
description='Transcription tool for audio files based on Whisper and Pyannote',
|
||||||
entry_points={'console_scripts':
|
entry_points={'console_scripts':
|
||||||
['autotranscript = autotranscript.autotranscript:cli']}
|
['autotranscript = autotranscript.autotranscript:cli']}
|
||||||
|
|||||||
Reference in New Issue
Block a user