Merge pull request #37 from JSchmie/develop_gradio_app
Develop gradio app
This commit is contained in:
@@ -0,0 +1,6 @@
|
|||||||
|
scraibe/*__pycache__
|
||||||
|
scraibe/app/*__pycache__
|
||||||
|
scraibe/.pyannotetoken
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
.github
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
transcibe.py
|
||||||
|
scraibe/*__pycache__
|
||||||
|
scraibe/app/*__pycache__
|
||||||
|
scraibe/.pyannotetoken
|
||||||
|
|
||||||
|
|
||||||
@@ -7,9 +7,6 @@ from .diarisation import *
|
|||||||
from .version import get_version as _get_version
|
from .version import get_version as _get_version
|
||||||
from .misc import *
|
from .misc import *
|
||||||
|
|
||||||
from .app.gradio_app import *
|
|
||||||
from .app.qtfaststart import *
|
|
||||||
|
|
||||||
from .cli import *
|
from .cli import *
|
||||||
|
|
||||||
__version__ = _get_version()
|
__version__ = _get_version()
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
from .qtfaststart import *
|
|
||||||
from .gradio_app import *
|
|
||||||
@@ -1,441 +0,0 @@
|
|||||||
"""
|
|
||||||
Gradio Audio Transcription App.
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
This module provides an interface to transcribe audio files using the
|
|
||||||
Scraibe model. Users can either upload an audio file or record their speech
|
|
||||||
live for transcription. The application supports multiple languages and provides
|
|
||||||
options to specify the number of speakers and the language of the audio.
|
|
||||||
|
|
||||||
Attributes:
|
|
||||||
LANGUAGES (list): A list of supported languages for transcription.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
Run this script to start the Gradio web interface for audio transcription.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
"""
|
|
||||||
Gradio Audio Transcription App.
|
|
||||||
--------------------------------
|
|
||||||
|
|
||||||
This module provides an interface to transcribe audio files using the
|
|
||||||
Scraibe model. Users can either upload an audio file or record their speech
|
|
||||||
live for transcription. The application supports multiple languages and provides
|
|
||||||
options to specify the number of speakers and the language of the audio.
|
|
||||||
|
|
||||||
Attributes:
|
|
||||||
LANGUAGES (list): A list of supported languages for transcription.
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
Run this script to start the Gradio web interface for audio transcription.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
import gradio as gr
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from scraibe import Scraibe, Transcript
|
|
||||||
|
|
||||||
theme = gr.themes.Soft(
|
|
||||||
primary_hue="green",
|
|
||||||
secondary_hue='orange',
|
|
||||||
neutral_hue="gray",
|
|
||||||
)
|
|
||||||
|
|
||||||
LANGUAGES = [
|
|
||||||
"Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
|
|
||||||
"Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
|
|
||||||
"Czech", "Danish", "Dutch", "English", "Estonian",
|
|
||||||
"Finnish", "French", "Galician", "German", "Greek",
|
|
||||||
"Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
|
|
||||||
"Italian", "Japanese", "Kannada", "Kazakh", "Korean",
|
|
||||||
"Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
|
|
||||||
"Maori", "Nepali", "Norwegian", "Persian", "Polish",
|
|
||||||
"Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
|
|
||||||
"Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
|
|
||||||
"Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
|
|
||||||
"Vietnamese", "Welsh"
|
|
||||||
]
|
|
||||||
|
|
||||||
CURRENT_PATH = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
|
|
||||||
class GradioTranscriptionInterface:
|
|
||||||
"""
|
|
||||||
Interface handling the interaction between Gradio UI and the Audio Transcription system.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model: Scraibe):
|
|
||||||
"""
|
|
||||||
Initializes the GradioTranscriptionInterface with a transcription model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model (Scraibe): Model responsible for audio transcription tasks.
|
|
||||||
"""
|
|
||||||
self.model = model
|
|
||||||
|
|
||||||
def auto_transcribe(self, source,
|
|
||||||
num_speakers : int,
|
|
||||||
translation : bool,
|
|
||||||
language : str):
|
|
||||||
"""
|
|
||||||
Shortcut method for the Scraibe task.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
tuple: Transcribed text (str), JSON output (dict)
|
|
||||||
"""
|
|
||||||
|
|
||||||
kwargs = {
|
|
||||||
"num_speakers": num_speakers if num_speakers != 0 else None,
|
|
||||||
"language": language if language != "None" else None,
|
|
||||||
"task": 'translate' if translation else None
|
|
||||||
}
|
|
||||||
if isinstance(source, str):
|
|
||||||
try:
|
|
||||||
result = self.model.autotranscribe(source, **kwargs)
|
|
||||||
except ValueError:
|
|
||||||
raise gr.Error("Couldn't detect any speech in the provided audio. \
|
|
||||||
Please try again!")
|
|
||||||
|
|
||||||
return str(result), result.get_json()
|
|
||||||
|
|
||||||
elif isinstance(source, list):
|
|
||||||
source_names = [s.split("/")[-1] for s in source]
|
|
||||||
result = []
|
|
||||||
for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
|
|
||||||
try:
|
|
||||||
res = self.model.autotranscribe(s, **kwargs)
|
|
||||||
except ValueError:
|
|
||||||
_name = s.split("/")[-1]
|
|
||||||
res = f"NO TRANSCRIPT FOUND FOR {_name}"
|
|
||||||
gr.Warning(f"Couldn't detect any speech in {_name} will skip this file.")
|
|
||||||
result.append(res)
|
|
||||||
|
|
||||||
out = ''
|
|
||||||
out_dict = {}
|
|
||||||
for i, r in enumerate(result):
|
|
||||||
out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
|
|
||||||
out += str(r)
|
|
||||||
out += "\n\n"
|
|
||||||
|
|
||||||
if isinstance(r, str):
|
|
||||||
out_dict[source_names[i]] = r
|
|
||||||
else:
|
|
||||||
out_dict[source_names[i]] = r.get_dict()
|
|
||||||
|
|
||||||
return out, json.dumps(out_dict, indent=4)
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise gr.Error("Please provide a valid audio file.")
|
|
||||||
|
|
||||||
|
|
||||||
def transcribe(self, source, translation, language):
|
|
||||||
"""
|
|
||||||
Shortcut method for the Transcribe task.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Transcribed text.
|
|
||||||
"""
|
|
||||||
kwargs = {
|
|
||||||
"language": language if language != "None" else None,
|
|
||||||
"task": 'translate' if translation == "Yes" else None
|
|
||||||
}
|
|
||||||
|
|
||||||
if isinstance(source, str):
|
|
||||||
result = self.model.transcribe(source, **kwargs)
|
|
||||||
|
|
||||||
return str(result)
|
|
||||||
|
|
||||||
elif isinstance(source, list):
|
|
||||||
source_names = [s.split("/")[-1] for s in source]
|
|
||||||
result = []
|
|
||||||
for s in tqdm(source, total=len(source),desc = "Transcribing audio files"):
|
|
||||||
res = self.model.transcribe(s, **kwargs)
|
|
||||||
result.append(res)
|
|
||||||
|
|
||||||
out = ''
|
|
||||||
for i, res in enumerate(result):
|
|
||||||
out += f"TRANSCRIPT FOR {source_names[i]}:\n\n"
|
|
||||||
out += str(res)
|
|
||||||
out += "\n\n"
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise gr.Error("Please provide a valid audio file.")
|
|
||||||
|
|
||||||
def perform_diarisation(self, source, num_speakers):
|
|
||||||
"""
|
|
||||||
Shortcut method for the Diarisation task.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: JSON output of diarisation result.
|
|
||||||
"""
|
|
||||||
kwargs = {
|
|
||||||
"num_speakers": num_speakers if num_speakers != 0 else None,
|
|
||||||
}
|
|
||||||
|
|
||||||
if isinstance(source, str):
|
|
||||||
try:
|
|
||||||
result = self.model.diarization(source, **kwargs)
|
|
||||||
except ValueError:
|
|
||||||
raise gr.Error("Couldn't detect any speech in the provided audio. \
|
|
||||||
Please try again!")
|
|
||||||
|
|
||||||
return json.dumps(result, indent=2)
|
|
||||||
elif isinstance(source, list):
|
|
||||||
source_names = [s.split("/")[-1] for s in source]
|
|
||||||
result = []
|
|
||||||
for s in tqdm(source, total=len(source),desc = "Performing diarisation"):
|
|
||||||
try:
|
|
||||||
res = self.model.diarization(s, **kwargs)
|
|
||||||
except ValueError:
|
|
||||||
res = f"NO DIARISATION FOUND FOR {s}"
|
|
||||||
gr.Warning(f"Couldn't detect any speech in {s} will skip this file.")
|
|
||||||
result.append(res)
|
|
||||||
|
|
||||||
out = {}
|
|
||||||
|
|
||||||
for i, res in enumerate(result):
|
|
||||||
out[source_names[i]] = res
|
|
||||||
|
|
||||||
return json.dumps(out, indent=4)
|
|
||||||
|
|
||||||
else:
|
|
||||||
gr.Error("Please provide a valid audio file.")
|
|
||||||
|
|
||||||
|
|
||||||
####
|
|
||||||
# Gradio Interface
|
|
||||||
####
|
|
||||||
|
|
||||||
def gradio_Interface(model : Scraibe = None):
|
|
||||||
|
|
||||||
if model is None:
|
|
||||||
model = Scraibe()
|
|
||||||
|
|
||||||
pipe = GradioTranscriptionInterface(model)
|
|
||||||
|
|
||||||
def select_task(choice):
|
|
||||||
if choice == 'Auto Transcribe':
|
|
||||||
|
|
||||||
return (gr.update(visible = True),
|
|
||||||
gr.update(visible = True),
|
|
||||||
gr.update(visible = True))
|
|
||||||
|
|
||||||
|
|
||||||
elif choice == 'Transcribe':
|
|
||||||
|
|
||||||
return (gr.update(visible = False),
|
|
||||||
gr.update(visible = True),
|
|
||||||
gr.update(visible = True))
|
|
||||||
|
|
||||||
|
|
||||||
elif choice == 'Diarisation':
|
|
||||||
|
|
||||||
return (gr.update(visible = True),
|
|
||||||
gr.update(visible = False),
|
|
||||||
gr.update(visible = False))
|
|
||||||
|
|
||||||
def select_origin(choice):
|
|
||||||
if choice == "Upload Audio":
|
|
||||||
|
|
||||||
return (gr.update(visible = True),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None))
|
|
||||||
|
|
||||||
elif choice == "Record Audio":
|
|
||||||
|
|
||||||
return (gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = True),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None))
|
|
||||||
|
|
||||||
elif choice == "Upload Video":
|
|
||||||
|
|
||||||
return (gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = True),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None))
|
|
||||||
|
|
||||||
elif choice == "Record Video":
|
|
||||||
|
|
||||||
return (gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = True),
|
|
||||||
gr.update(visible = False, value = None))
|
|
||||||
|
|
||||||
elif choice == "File or Files":
|
|
||||||
|
|
||||||
return (gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = False, value = None),
|
|
||||||
gr.update(visible = True))
|
|
||||||
|
|
||||||
def run_scribe(task,
|
|
||||||
num_speakers,
|
|
||||||
translate,
|
|
||||||
language,
|
|
||||||
audio1,
|
|
||||||
audio2,
|
|
||||||
video1,
|
|
||||||
video2,
|
|
||||||
file_in,
|
|
||||||
progress = gr.Progress(track_tqdm= True)):
|
|
||||||
# get *args which are not None
|
|
||||||
progress(0, desc='Starting task...')
|
|
||||||
source = audio1 or audio2 or video1 or video2 or file_in
|
|
||||||
|
|
||||||
if isinstance(source, list):
|
|
||||||
source = [s.name for s in source]
|
|
||||||
if len(source) == 1:
|
|
||||||
source = source[0]
|
|
||||||
|
|
||||||
if task == 'Auto Transcribe':
|
|
||||||
|
|
||||||
out_str , out_json = pipe.auto_transcribe(source = source,
|
|
||||||
num_speakers = num_speakers,
|
|
||||||
translation = translate,
|
|
||||||
language = language)
|
|
||||||
|
|
||||||
if isinstance(source, str):
|
|
||||||
return (gr.update(value = out_str, visible = True),
|
|
||||||
gr.update(value = out_json, visible = True),
|
|
||||||
gr.update(visible = True),
|
|
||||||
gr.update(visible = True))
|
|
||||||
else:
|
|
||||||
return (gr.update(value = out_str, visible = True),
|
|
||||||
gr.update(value = out_json, visible = True),
|
|
||||||
gr.update(visible = False),
|
|
||||||
gr.update(visible = False))
|
|
||||||
|
|
||||||
elif task == 'Transcribe':
|
|
||||||
|
|
||||||
out = pipe.transcribe(source = source,
|
|
||||||
translation = translate,
|
|
||||||
language = language)
|
|
||||||
|
|
||||||
return (gr.update(value = out, visible = True),
|
|
||||||
gr.update(value = None, visible = False),
|
|
||||||
gr.update(visible = False),
|
|
||||||
gr.update(visible = False))
|
|
||||||
|
|
||||||
elif task == 'Diarisation':
|
|
||||||
|
|
||||||
out = pipe.perform_diarisation(source = source,
|
|
||||||
num_speakers = num_speakers)
|
|
||||||
|
|
||||||
return (gr.update(value = None, visible = False),
|
|
||||||
gr.update(value = out, visible = True),
|
|
||||||
gr.update(visible = False),
|
|
||||||
gr.update(visible = False))
|
|
||||||
|
|
||||||
def annotate_output(annoation : str, out_json : dict):
|
|
||||||
# get *args which are not None
|
|
||||||
|
|
||||||
trans = Transcript.from_json(out_json)
|
|
||||||
trans = trans.annotate(*annoation.split(","))
|
|
||||||
|
|
||||||
return gr.update(value = str(trans)),gr.update(value = trans.get_json())
|
|
||||||
|
|
||||||
|
|
||||||
with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo:
|
|
||||||
|
|
||||||
# Define components
|
|
||||||
hname = os.path.join(CURRENT_PATH, "header.html")
|
|
||||||
header = open(hname, "r").read()
|
|
||||||
|
|
||||||
# ugly hack to get the logo to work
|
|
||||||
header = header.replace("/file=logo.svg", f"/file={CURRENT_PATH}/logo.svg" )
|
|
||||||
|
|
||||||
gr.HTML(header, visible= True, show_label=False)
|
|
||||||
|
|
||||||
with gr.Row():
|
|
||||||
|
|
||||||
with gr.Column():
|
|
||||||
|
|
||||||
task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task",
|
|
||||||
value= 'Auto Transcribe')
|
|
||||||
|
|
||||||
num_speakers = gr.Number(value=0, label= "Number of speakers (optional)",
|
|
||||||
info = "Number of speakers in the audio file. If you don't know,\
|
|
||||||
leave it at 0.", visible= True)
|
|
||||||
|
|
||||||
translate = gr.Checkbox(label="Translation", choices=[True, False], value = False,
|
|
||||||
info="Select 'Yes' to have the output translated into English.",
|
|
||||||
visible= True)
|
|
||||||
|
|
||||||
language = gr.Dropdown(LANGUAGES,
|
|
||||||
label="Language (optional)", value = "None",
|
|
||||||
info="Language of the audio file. If you don't know,\
|
|
||||||
leave it at None.", visible= True)
|
|
||||||
|
|
||||||
input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video"
|
|
||||||
,"File or Files"], label="Input Type", value="Upload Audio")
|
|
||||||
|
|
||||||
audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
|
|
||||||
interactive= True, visible= True)
|
|
||||||
audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath",
|
|
||||||
interactive= True, visible= False)
|
|
||||||
video1 = gr.Video(source="upload", type="filepath", label="Upload Video",
|
|
||||||
interactive= True, visible= False)
|
|
||||||
video2 = gr.Video(source="webcam", label="Record Video", type="filepath",include_audio= True,
|
|
||||||
interactive= True, visible= False)
|
|
||||||
file_in = gr.Files(label="Upload File or Files", interactive= True, visible= False)
|
|
||||||
|
|
||||||
submit = gr.Button()
|
|
||||||
|
|
||||||
with gr.Column():
|
|
||||||
|
|
||||||
out_txt = gr.Textbox(label="Output",
|
|
||||||
visible= True, show_copy_button=True)
|
|
||||||
|
|
||||||
out_json = gr.JSON(label="JSON Output",
|
|
||||||
visible= False, show_copy_button=True)
|
|
||||||
|
|
||||||
annoation = gr.Textbox(label="Name your speaker's",
|
|
||||||
info= "Please provide a list of the speakers arranged \
|
|
||||||
in the order in which they appear in the input. Use comma ',' \
|
|
||||||
as a seperator. Be aware that the first name is given \
|
|
||||||
to SPEAKER_00 the second to SPEAKER_01 and so on.",
|
|
||||||
visible= False, interactive= True)
|
|
||||||
|
|
||||||
annotate = gr.Button(value="Annotate", visible= False, interactive= True)
|
|
||||||
|
|
||||||
# Define usage of components
|
|
||||||
input.change(fn=select_origin, inputs=[input],
|
|
||||||
outputs=[audio1, audio2, video1, video2, file_in])
|
|
||||||
|
|
||||||
task.change(fn=select_task, inputs=[task],
|
|
||||||
outputs=[num_speakers, translate, language])
|
|
||||||
|
|
||||||
translate.change(fn= lambda x : gr.update(value = x),
|
|
||||||
inputs=[translate], outputs=[translate])
|
|
||||||
num_speakers.change(fn= lambda x : gr.update(value = x),
|
|
||||||
inputs=[num_speakers], outputs=[num_speakers])
|
|
||||||
language.change(fn= lambda x : gr.update(value = x),
|
|
||||||
inputs=[language], outputs=[language])
|
|
||||||
|
|
||||||
submit.click(fn = run_scribe,
|
|
||||||
inputs=[task, num_speakers, translate, language, audio1,
|
|
||||||
audio2, video1, video2, file_in],
|
|
||||||
outputs=[out_txt, out_json, annoation, annotate])
|
|
||||||
|
|
||||||
annotate.click(fn = annotate_output, inputs=[annoation, out_json],
|
|
||||||
outputs=[out_txt, out_json])
|
|
||||||
|
|
||||||
return demo
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
gradio_Interface().queue().launch()
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
<!-- Importing Cormorant Garamond font from Google Fonts -->
|
|
||||||
<link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@400;700&display=swap" rel="stylesheet">
|
|
||||||
|
|
||||||
<style>
|
|
||||||
.header-container {
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
justify-content: center;
|
|
||||||
position: relative;
|
|
||||||
padding-top: 30px;
|
|
||||||
}
|
|
||||||
.logo-container {
|
|
||||||
position: absolute;
|
|
||||||
top: 50%;
|
|
||||||
right: 20px;
|
|
||||||
transform: translateY(-50%);
|
|
||||||
width: 300px;
|
|
||||||
}
|
|
||||||
.logo {
|
|
||||||
width: 100%;
|
|
||||||
height: auto;
|
|
||||||
}
|
|
||||||
h1 {
|
|
||||||
font-family: 'Cormorant Garamond', serif;
|
|
||||||
font-size: 50px !important; /* Increased font size */
|
|
||||||
font-weight: bold;
|
|
||||||
color: #50AF31;
|
|
||||||
margin: 0;
|
|
||||||
position: relative;
|
|
||||||
padding: 0.5em 0;
|
|
||||||
}
|
|
||||||
h1::before, h1::after {
|
|
||||||
content: "";
|
|
||||||
position: absolute;
|
|
||||||
height: 2px;
|
|
||||||
width: 80%;
|
|
||||||
background-color: #50AF31;
|
|
||||||
left: 10%;
|
|
||||||
}
|
|
||||||
h1::before {
|
|
||||||
top: 0.5em;
|
|
||||||
}
|
|
||||||
h1::after {
|
|
||||||
bottom: 0.5em;
|
|
||||||
}
|
|
||||||
p, h2 {
|
|
||||||
font-size: 16px;
|
|
||||||
margin: 10px 0;
|
|
||||||
line-height: 1.4;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
|
|
||||||
<div class="header-container">
|
|
||||||
<h1>ScrAIbe</h1>
|
|
||||||
<div class="logo-container">
|
|
||||||
<a href="https://www.kida-bmel.de/"> <!-- Replace with your actual URL -->
|
|
||||||
<img src="/file=logo.svg" alt="KIDA Logo" class="logo">
|
|
||||||
</a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div style="text-align: center; padding: 20px 10%;">
|
|
||||||
<p>
|
|
||||||
Upload, record, or provide a video with audio for transcription. Our toolkit is designed to transcribe content from multiple languages accurately. The integrated speaker diarisation feature identifies different speakers, ensuring a smooth transcription experience. For optimal results, indicate the number of speakers and the original language of the content.
|
|
||||||
</p>
|
|
||||||
<h2 style="font-weight: bold; color: #50AF31;">What would you like to do next?</h2>
|
|
||||||
</div>
|
|
||||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 29 KiB |
@@ -1,319 +0,0 @@
|
|||||||
"""
|
|
||||||
This file contains a modified version of qtfaststart by qtfaststart
|
|
||||||
https://github.com/danielgtaylor/qtfaststart/tree/master
|
|
||||||
|
|
||||||
All credit goes to the original author.
|
|
||||||
Copyright (C) 2008 - 2013 Daniel G. Taylor <dan@programmer-art.org>
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
|
||||||
software and associated documentation files (the "Software"),
|
|
||||||
to deal in the Software without restriction, including without limitation the rights to
|
|
||||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
|
|
||||||
Software, and to permit persons to whom the Software is furnished to do so,
|
|
||||||
subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all copies
|
|
||||||
or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
||||||
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
||||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
||||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
||||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
||||||
IN THE SOFTWARE.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import struct
|
|
||||||
import collections
|
|
||||||
import io
|
|
||||||
|
|
||||||
# define error classes
|
|
||||||
class FastStartException(Exception):
|
|
||||||
"""
|
|
||||||
Raised when something bad happens during processing.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class FastStartSetupError(FastStartException):
|
|
||||||
"""
|
|
||||||
Rasised when asked to process a file that does not need processing
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class MalformedFileError(FastStartException):
|
|
||||||
"""
|
|
||||||
Raised when the input file is setup in an unexpected way
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class UnsupportedFormatError(FastStartException):
|
|
||||||
"""
|
|
||||||
Raised when a movie file is recognized as a format not supported.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
# define constants
|
|
||||||
CHUNK_SIZE = 8192
|
|
||||||
|
|
||||||
log = logging.getLogger("qtfaststart")
|
|
||||||
|
|
||||||
# Older versions of Python require this to be defined
|
|
||||||
if not hasattr(os, 'SEEK_CUR'):
|
|
||||||
os.SEEK_CUR = 1
|
|
||||||
|
|
||||||
Atom = collections.namedtuple('Atom', 'name position size')
|
|
||||||
|
|
||||||
def read_atom(datastream):
|
|
||||||
"""
|
|
||||||
Read an atom and return a tuple of (size, type) where size is the size
|
|
||||||
in bytes (including the 8 bytes already read) and type is a "fourcc"
|
|
||||||
like "ftyp" or "moov".
|
|
||||||
"""
|
|
||||||
size, type = struct.unpack(">L4s", datastream.read(8))
|
|
||||||
type = type.decode('ascii')
|
|
||||||
return size, type
|
|
||||||
|
|
||||||
|
|
||||||
def _read_atom_ex(datastream):
|
|
||||||
"""
|
|
||||||
Read an Atom from datastream
|
|
||||||
"""
|
|
||||||
pos = datastream.tell()
|
|
||||||
atom_size, atom_type = read_atom(datastream)
|
|
||||||
if atom_size == 1:
|
|
||||||
atom_size, = struct.unpack(">Q", datastream.read(8))
|
|
||||||
return Atom(atom_type, pos, atom_size)
|
|
||||||
|
|
||||||
|
|
||||||
def get_index(datastream):
|
|
||||||
"""
|
|
||||||
Return an index of top level atoms, their absolute byte-position in the
|
|
||||||
file and their size in a list:
|
|
||||||
|
|
||||||
index = [
|
|
||||||
("ftyp", 0, 24),
|
|
||||||
("moov", 25, 2658),
|
|
||||||
("free", 2683, 8),
|
|
||||||
...
|
|
||||||
]
|
|
||||||
|
|
||||||
The tuple elements will be in the order that they appear in the file.
|
|
||||||
"""
|
|
||||||
log.debug("Getting index of top level atoms...")
|
|
||||||
|
|
||||||
index = list(_read_atoms(datastream))
|
|
||||||
_ensure_valid_index(index)
|
|
||||||
|
|
||||||
return index
|
|
||||||
|
|
||||||
|
|
||||||
def _read_atoms(datastream):
|
|
||||||
"""
|
|
||||||
Read atoms until an error occurs
|
|
||||||
"""
|
|
||||||
while datastream:
|
|
||||||
try:
|
|
||||||
atom = _read_atom_ex(datastream)
|
|
||||||
log.debug("%s: %s" % (atom.name, atom.size))
|
|
||||||
except:
|
|
||||||
break
|
|
||||||
|
|
||||||
yield atom
|
|
||||||
|
|
||||||
if atom.size == 0:
|
|
||||||
if atom.name == "mdat":
|
|
||||||
# Some files may end in mdat with no size set, which generally
|
|
||||||
# means to seek to the end of the file. We can just stop indexing
|
|
||||||
# as no more entries will be found!
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Weird, but just continue to try to find more atoms
|
|
||||||
continue
|
|
||||||
|
|
||||||
datastream.seek(atom.position + atom.size)
|
|
||||||
|
|
||||||
|
|
||||||
def _ensure_valid_index(index):
|
|
||||||
"""
|
|
||||||
Ensure the minimum viable atoms are present in the index.
|
|
||||||
|
|
||||||
Raise FastStartException if not.
|
|
||||||
"""
|
|
||||||
top_level_atoms = set([item.name for item in index])
|
|
||||||
for key in ["moov", "mdat"]:
|
|
||||||
if key not in top_level_atoms:
|
|
||||||
log.error("%s atom not found, is this a valid MOV/MP4 file?" % key)
|
|
||||||
raise FastStartException()
|
|
||||||
|
|
||||||
|
|
||||||
def find_atoms(size, datastream):
|
|
||||||
"""
|
|
||||||
Compatibilty interface for _find_atoms_ex
|
|
||||||
"""
|
|
||||||
fake_parent = Atom('fake', datastream.tell()-8, size+8)
|
|
||||||
for atom in _find_atoms_ex(fake_parent, datastream):
|
|
||||||
yield atom.name
|
|
||||||
|
|
||||||
|
|
||||||
def _find_atoms_ex(parent_atom, datastream):
|
|
||||||
"""
|
|
||||||
Yield either "stco" or "co64" Atoms from datastream.
|
|
||||||
datastream will be 8 bytes into the stco or co64 atom when the value
|
|
||||||
is yielded.
|
|
||||||
|
|
||||||
It is assumed that datastream will be at the end of the atom after
|
|
||||||
the value has been yielded and processed.
|
|
||||||
|
|
||||||
parent_atom is the parent atom, a 'moov' or other ancestor of CO
|
|
||||||
atoms in the datastream.
|
|
||||||
"""
|
|
||||||
stop = parent_atom.position + parent_atom.size
|
|
||||||
|
|
||||||
while datastream.tell() < stop:
|
|
||||||
try:
|
|
||||||
atom = _read_atom_ex(datastream)
|
|
||||||
except:
|
|
||||||
log.exception("Error reading next atom!")
|
|
||||||
raise FastStartException()
|
|
||||||
|
|
||||||
if atom.name in ["trak", "mdia", "minf", "stbl"]:
|
|
||||||
# Known ancestor atom of stco or co64, search within it!
|
|
||||||
for res in _find_atoms_ex(atom, datastream):
|
|
||||||
yield res
|
|
||||||
elif atom.name in ["stco", "co64"]:
|
|
||||||
yield atom
|
|
||||||
else:
|
|
||||||
# Ignore this atom, seek to the end of it.
|
|
||||||
datastream.seek(atom.position + atom.size)
|
|
||||||
|
|
||||||
|
|
||||||
def process(infilename, limit=float('inf')):
|
|
||||||
"""
|
|
||||||
Convert a Quicktime/MP4 file for streaming by moving the metadata to
|
|
||||||
the front of the file. This method writes a new file.
|
|
||||||
|
|
||||||
If limit is set to something other than zero it will be used as the
|
|
||||||
number of bytes to write of the atoms following the moov atom. This
|
|
||||||
is very useful to create a small sample of a file with full headers,
|
|
||||||
which can then be used in bug reports and such.
|
|
||||||
"""
|
|
||||||
if isinstance(infilename, str):
|
|
||||||
datastream = open(infilename, "rb")
|
|
||||||
elif isinstance(infilename, bytes):
|
|
||||||
datastream = io.BytesIO(infilename)
|
|
||||||
else:
|
|
||||||
raise TypeError("infilename must be a filename, bytes or file-like object")
|
|
||||||
# Get the top level atom index
|
|
||||||
index = get_index(datastream)
|
|
||||||
|
|
||||||
mdat_pos = 999999
|
|
||||||
free_size = 0
|
|
||||||
|
|
||||||
# Make sure moov occurs AFTER mdat, otherwise no need to run!
|
|
||||||
for atom in index:
|
|
||||||
# The atoms are guaranteed to exist from get_index above!
|
|
||||||
if atom.name == "moov":
|
|
||||||
moov_atom = atom
|
|
||||||
moov_pos = atom.position
|
|
||||||
elif atom.name == "mdat":
|
|
||||||
mdat_pos = atom.position
|
|
||||||
elif atom.name == "free" and atom.position < mdat_pos:
|
|
||||||
# This free atom is before the mdat!
|
|
||||||
free_size += atom.size
|
|
||||||
log.info("Removing free atom at %d (%d bytes)" % (atom.position, atom.size))
|
|
||||||
elif atom.name == "\x00\x00\x00\x00" and atom.position < mdat_pos:
|
|
||||||
# This is some strange zero atom with incorrect size
|
|
||||||
free_size += 8
|
|
||||||
log.info("Removing strange zero atom at %s (8 bytes)" % atom.position)
|
|
||||||
|
|
||||||
# Offset to shift positions
|
|
||||||
offset = moov_atom.size - free_size
|
|
||||||
|
|
||||||
if moov_pos < mdat_pos:
|
|
||||||
# moov appears to be in the proper place, don't shift by moov size
|
|
||||||
offset -= moov_atom.size
|
|
||||||
if not free_size:
|
|
||||||
# No free atoms and moov is correct, we are done!
|
|
||||||
log.error("This file appears to already be setup for streaming!")
|
|
||||||
# Stupid hack to retrun the non-processed file:
|
|
||||||
if isinstance(infilename, str):
|
|
||||||
return open(infilename, "rb").read()
|
|
||||||
elif isinstance(infilename, bytes):
|
|
||||||
return io.BytesIO(infilename).read()
|
|
||||||
|
|
||||||
# Read and fix moov
|
|
||||||
moov = _patch_moov(datastream, moov_atom, offset)
|
|
||||||
|
|
||||||
log.info("Writing output...")
|
|
||||||
outfile = b''
|
|
||||||
|
|
||||||
# Write ftype
|
|
||||||
for atom in index:
|
|
||||||
if atom.name == "ftyp":
|
|
||||||
log.debug("Writing ftyp... (%d bytes)" % atom.size)
|
|
||||||
datastream.seek(atom.position)
|
|
||||||
outfile += datastream.read(atom.size)
|
|
||||||
|
|
||||||
# Write moov
|
|
||||||
_bytes = moov.getvalue()
|
|
||||||
log.debug("Writing moov... (%d bytes)" % len(_bytes))
|
|
||||||
outfile += _bytes
|
|
||||||
|
|
||||||
# Write the rest
|
|
||||||
atoms = [item for item in index if item.name not in ["ftyp", "moov", "free"]]
|
|
||||||
for atom in atoms:
|
|
||||||
log.debug("Writing %s... (%d bytes)" % (atom.name, atom.size))
|
|
||||||
datastream.seek(atom.position)
|
|
||||||
|
|
||||||
# for compatability, allow '0' to mean no limit
|
|
||||||
cur_limit = limit or float('inf')
|
|
||||||
cur_limit = min(cur_limit, atom.size)
|
|
||||||
|
|
||||||
for chunk in get_chunks(datastream, CHUNK_SIZE, cur_limit):
|
|
||||||
outfile += chunk
|
|
||||||
|
|
||||||
return outfile
|
|
||||||
|
|
||||||
|
|
||||||
def _patch_moov(datastream, atom, offset):
|
|
||||||
datastream.seek(atom.position)
|
|
||||||
moov = io.BytesIO(datastream.read(atom.size))
|
|
||||||
|
|
||||||
# reload the atom from the fixed stream
|
|
||||||
atom = _read_atom_ex(moov)
|
|
||||||
|
|
||||||
for atom in _find_atoms_ex(atom, moov):
|
|
||||||
# Read either 32-bit or 64-bit offsets
|
|
||||||
ctype, csize = dict(
|
|
||||||
stco=('L', 4),
|
|
||||||
co64=('Q', 8),
|
|
||||||
)[atom.name]
|
|
||||||
|
|
||||||
# Get number of entries
|
|
||||||
version, entry_count = struct.unpack(">2L", moov.read(8))
|
|
||||||
|
|
||||||
log.info("Patching %s with %d entries" % (atom.name, entry_count))
|
|
||||||
|
|
||||||
entries_pos = moov.tell()
|
|
||||||
|
|
||||||
struct_fmt = ">%(entry_count)s%(ctype)s" % vars()
|
|
||||||
|
|
||||||
# Read entries
|
|
||||||
entries = struct.unpack(struct_fmt, moov.read(csize * entry_count))
|
|
||||||
|
|
||||||
# Patch and write entries
|
|
||||||
offset_entries = [entry + offset for entry in entries]
|
|
||||||
moov.seek(entries_pos)
|
|
||||||
moov.write(struct.pack(struct_fmt, *offset_entries))
|
|
||||||
return moov
|
|
||||||
|
|
||||||
def get_chunks(stream, chunk_size, limit):
|
|
||||||
remaining = limit
|
|
||||||
while remaining:
|
|
||||||
chunk = stream.read(min(remaining, chunk_size))
|
|
||||||
if not chunk:
|
|
||||||
return
|
|
||||||
remaining -= len(chunk)
|
|
||||||
yield chunk
|
|
||||||
@@ -75,6 +75,11 @@ class Scraibe:
|
|||||||
Path to pyannote diarization model or model itself.
|
Path to pyannote diarization model or model itself.
|
||||||
**kwargs: Additional keyword arguments for whisper
|
**kwargs: Additional keyword arguments for whisper
|
||||||
and pyannote diarization models.
|
and pyannote diarization models.
|
||||||
|
e.g.:
|
||||||
|
|
||||||
|
- verbose: If True, the class will print additional information.
|
||||||
|
- save_kwargs: If True, the keyword arguments will be saved
|
||||||
|
for autotranscribe. So you can unload the class and reload it again.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
@@ -98,6 +103,15 @@ class Scraibe:
|
|||||||
else:
|
else:
|
||||||
self.verbose = False
|
self.verbose = False
|
||||||
|
|
||||||
|
# Save kwargs for autotranscribe if you want to unload the class and load it again.
|
||||||
|
if kwargs.get('save_setup'):
|
||||||
|
self.params = dict(whisper_model = whisper_model,
|
||||||
|
dia_model = dia_model,
|
||||||
|
**kwargs)
|
||||||
|
else:
|
||||||
|
self.params = {}
|
||||||
|
|
||||||
|
|
||||||
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
def autotranscribe(self, audio_file : Union[str, torch.Tensor, ndarray],
|
||||||
remove_original : bool = False,
|
remove_original : bool = False,
|
||||||
**kwargs) -> Transcript:
|
**kwargs) -> Transcript:
|
||||||
|
|||||||
+29
-10
@@ -9,7 +9,8 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from .autotranscript import Scraibe
|
from .autotranscript import Scraibe
|
||||||
from .app.gradio_app import gradio_Interface
|
from .misc import ParseKwargs
|
||||||
|
|
||||||
|
|
||||||
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
from whisper.tokenizer import LANGUAGES , TO_LANGUAGE_CODE
|
||||||
from torch.cuda import is_available
|
from torch.cuda import is_available
|
||||||
@@ -41,13 +42,15 @@ def cli():
|
|||||||
help="List of audio files to transcribe.")
|
help="List of audio files to transcribe.")
|
||||||
|
|
||||||
group.add_argument('--start-server', action='store_true',
|
group.add_argument('--start-server', action='store_true',
|
||||||
help='Start the Gradio app.')
|
help='Start the Gradio app.' \
|
||||||
|
'If set, all other arguments are ignored' \
|
||||||
|
'besides --server-config or --server-kwargs.')
|
||||||
|
|
||||||
parser.add_argument("--port", type=int, default= None,
|
parser.add_argument("--server-config", type=str, default= None,
|
||||||
help="Port to run the Gradio app on. Defaults to 7860.")
|
help="Path to the configy.yml file.")
|
||||||
|
|
||||||
parser.add_argument("--server-name", type=str, default= None,
|
parser.add_argument('--server-kwargs', nargs='*', action=ParseKwargs, default={},
|
||||||
help="Name of the Gradio app. If empty 127.0.0.1 or 0.0.0.0 will be used.")
|
help='Keyword arguments for the Gradio app.')
|
||||||
|
|
||||||
parser.add_argument("--whisper-model-name", default="medium",
|
parser.add_argument("--whisper-model-name", default="medium",
|
||||||
help="Name of the Whisper model to use.")
|
help="Name of the Whisper model to use.")
|
||||||
@@ -66,7 +69,8 @@ def cli():
|
|||||||
help="Device to use for PyTorch inference.")
|
help="Device to use for PyTorch inference.")
|
||||||
|
|
||||||
parser.add_argument("--num-threads", type=int, default=0,
|
parser.add_argument("--num-threads", type=int, default=0,
|
||||||
help="Number of threads used by torch for CPU inference; overrides MKL_NUM_THREADS/OMP_NUM_THREADS.")
|
help="Number of threads used by torch for CPU inference; '\
|
||||||
|
'overrides MKL_NUM_THREADS/OMP_NUM_THREADS.")
|
||||||
|
|
||||||
parser.add_argument("--output-directory", "-o", type=str, default=".",
|
parser.add_argument("--output-directory", "-o", type=str, default=".",
|
||||||
help="Directory to save the transcription outputs.")
|
help="Directory to save the transcription outputs.")
|
||||||
@@ -113,8 +117,9 @@ def cli():
|
|||||||
if arg_dict["whisper_model_directory"]:
|
if arg_dict["whisper_model_directory"]:
|
||||||
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
|
class_kwargs["download_root"] = arg_dict.pop("whisper_model_directory")
|
||||||
|
|
||||||
model = Scraibe(**class_kwargs)
|
if not start_server:
|
||||||
|
|
||||||
|
model = Scraibe(**class_kwargs)
|
||||||
|
|
||||||
if arg_dict["audio_files"]:
|
if arg_dict["audio_files"]:
|
||||||
audio_files = arg_dict.pop("audio_files")
|
audio_files = arg_dict.pop("audio_files")
|
||||||
@@ -158,10 +163,24 @@ def cli():
|
|||||||
f.write(out)
|
f.write(out)
|
||||||
|
|
||||||
|
|
||||||
if start_server: # unfinished code
|
else: # unfinished code
|
||||||
|
raise NotImplementedError("Currently not Working")
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
gradio_Interface(model).queue().launch(server_port=args.port, server_name=args.server_name)
|
execute_path = os.path.join(os.path.dirname(__file__), "app/app_starter.py")
|
||||||
|
|
||||||
|
config = arg_dict.pop("server_config")
|
||||||
|
server_kwargs = arg_dict.pop("server_kwargs")
|
||||||
|
|
||||||
|
if not config:
|
||||||
|
subprocess.run([sys.executable, execute_path, f"--server-kwargs={server_kwargs}"])
|
||||||
|
elif not server_kwargs:
|
||||||
|
subprocess.run([sys.executable, execute_path, f"--server-config={config}"])
|
||||||
|
elif not config and not server_kwargs:
|
||||||
|
subprocess.run([sys.executable, execute_path])
|
||||||
|
else:
|
||||||
|
subprocess.run([sys.executable, execute_path, f"--server-config={config}", f"--server-kwargs={server_kwargs}"])
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cli()
|
cli()
|
||||||
@@ -27,7 +27,9 @@ Usage:
|
|||||||
diarisation_output = model.diarization("path/to/audiofile.wav")
|
diarisation_output = model.diarization("path/to/audiofile.wav")
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import warnings
|
||||||
import os
|
import os
|
||||||
|
import yaml
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TypeVar, Union
|
from typing import TypeVar, Union
|
||||||
|
|
||||||
@@ -216,6 +218,41 @@ class Diariser:
|
|||||||
if not os.path.exists(model) and use_auth_token is None:
|
if not os.path.exists(model) and use_auth_token is None:
|
||||||
use_auth_token = cls._get_token()
|
use_auth_token = cls._get_token()
|
||||||
|
|
||||||
|
elif os.path.exists(model) and not use_auth_token:
|
||||||
|
# check if model can be found locally nearby the config file
|
||||||
|
with open(model, 'r') as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
path_to_model = config['pipeline']['params']['segmentation']
|
||||||
|
|
||||||
|
if not os.path.exists(path_to_model):
|
||||||
|
warnings.warn(f"Model not found at {path_to_model}. "\
|
||||||
|
"Trying to find it nearby the config file.")
|
||||||
|
|
||||||
|
pwd = model.split("/")[:-1]
|
||||||
|
pwd = "/".join(pwd)
|
||||||
|
|
||||||
|
path_to_model = os.path.join(pwd, "pytorch_model.bin")
|
||||||
|
|
||||||
|
if not os.path.exists(path_to_model):
|
||||||
|
warnings.warn(f"Model not found at {path_to_model}. \
|
||||||
|
'Trying to find it nearby .bin files instead.")
|
||||||
|
# list elementes with the ending .bin
|
||||||
|
bin_files = [f for f in os.listdir(pwd) if f.endswith(".bin")]
|
||||||
|
if len(bin_files) == 1:
|
||||||
|
path_to_model = os.path.join(pwd, bin_files[0])
|
||||||
|
else:
|
||||||
|
warnings.warn("Found more than one .bin file. "\
|
||||||
|
"or none. Please specify the path to the model " \
|
||||||
|
"or setup a huggingface token.")
|
||||||
|
|
||||||
|
warnings.warn(f"Found model at {path_to_model} overwriting config file.")
|
||||||
|
|
||||||
|
config['pipeline']['params']['segmentation'] = path_to_model
|
||||||
|
|
||||||
|
with open(model, 'w') as file:
|
||||||
|
yaml.dump(config, file)
|
||||||
|
|
||||||
_model = Pipeline.from_pretrained(model,
|
_model = Pipeline.from_pretrained(model,
|
||||||
use_auth_token = use_auth_token,
|
use_auth_token = use_auth_token,
|
||||||
cache_dir = cache_dir,
|
cache_dir = cache_dir,
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import yaml
|
import yaml
|
||||||
from pyannote.audio.core.model import CACHE_DIR as PYANNOTE_CACHE_DIR
|
from pyannote.audio.core.model import CACHE_DIR as PYANNOTE_CACHE_DIR
|
||||||
|
from argparse import Action
|
||||||
|
|
||||||
CACHE_DIR = os.getenv(
|
CACHE_DIR = os.getenv(
|
||||||
"AUTOT_CACHE",
|
"AUTOT_CACHE",
|
||||||
@@ -40,3 +41,17 @@ def config_diarization_yaml(file_path: str, path_to_segmentation: str = None) ->
|
|||||||
|
|
||||||
with open(file_path, "w") as stream:
|
with open(file_path, "w") as stream:
|
||||||
yaml.dump(yml, stream)
|
yaml.dump(yml, stream)
|
||||||
|
|
||||||
|
class ParseKwargs(Action):
|
||||||
|
"""
|
||||||
|
Custom argparse action to parse keyword arguments.
|
||||||
|
"""
|
||||||
|
def __call__(self, parser, namespace, values, option_string=None):
|
||||||
|
setattr(namespace, self.dest, dict())
|
||||||
|
for value in values:
|
||||||
|
key, value = value.split('=')
|
||||||
|
try:
|
||||||
|
value = eval(value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
getattr(namespace, self.dest)[key] = value
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
from calendar import c
|
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
import os
|
import os
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
@@ -21,6 +20,8 @@ with open(verfile, "r") as fp:
|
|||||||
|
|
||||||
build_version = "SCRAIBE_BUILD" in os.environ
|
build_version = "SCRAIBE_BUILD" in os.environ
|
||||||
|
|
||||||
|
version["ISRELEASED"] = True if "ISRELEASED" in os.environ else False
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
@@ -53,7 +54,7 @@ if __name__ == "__main__":
|
|||||||
keywords = ['transcription', 'speech recognition', 'whisper', 'pyannote', 'audio', 'ScrAIbe', 'scraibe',
|
keywords = ['transcription', 'speech recognition', 'whisper', 'pyannote', 'audio', 'ScrAIbe', 'scraibe',
|
||||||
'speech-to-text', 'speech-to-text transcription', 'speech-to-text recognition',
|
'speech-to-text', 'speech-to-text transcription', 'speech-to-text recognition',
|
||||||
'voice-to-speech'],
|
'voice-to-speech'],
|
||||||
package_data={'scraibe.app' : ["*.html", "*.svg"]},
|
package_data={'scraibe.app' : ["*.html", "*.svg","*.yml"]},
|
||||||
entry_points={'console_scripts':
|
entry_points={'console_scripts':
|
||||||
['scraibe = scraibe.cli:cli']}
|
['scraibe = scraibe.cli:cli']}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user