renamed module

This commit is contained in:
Jaikinator
2023-09-18 15:29:09 +02:00
parent e76b7b51a5
commit 5385e266cc
21 changed files with 399 additions and 86 deletions
+3 -3
View File
@@ -1,12 +1,12 @@
from dash import Dash, dcc, html, dash_table, Input, Output, State, callback from dash import Dash, dcc, html, dash_table, Input, Output, State, callback
import base64 import base64
from autotranscript.app.qtfaststart import process from scraibe.app.qtfaststart import process
from autotranscript import AutoTranscribe from scraibe import AutoTranscribe
import io import io
import subprocess as sp import subprocess as sp
import numpy as np import numpy as np
from autotranscript.audio import SAMPLE_RATE from scraibe.audio import SAMPLE_RATE
# Setup auto-transcript # Setup auto-transcript
autot = AutoTranscribe() # whisper_model="tiny", whisper_kwargs={"local" : False} autot = AutoTranscribe() # whisper_model="tiny", whisper_kwargs={"local" : False}
+317
View File
@@ -0,0 +1,317 @@
"""
Gradio Audio Transcription App.
--------------------------------
This module provides an interface to transcribe audio files using the
AutoTranscribe model. Users can either upload an audio file or record their speech
live for transcription. The application supports multiple languages and provides
options to specify the number of speakers and the language of the audio.
Attributes:
LANGUAGES (list): A list of supported languages for transcription.
Usage:
Run this script to start the Gradio web interface for audio transcription.
"""
import json
import gradio as gr
from scraibe import AutoTranscribe, Transcript
theme = gr.themes.Soft(
primary_hue="green",
secondary_hue='orange',
neutral_hue="gray",
)
LANGUAGES = [
"Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian",
"Bosnian", "Bulgarian", "Catalan", "Chinese", "Croatian",
"Czech", "Danish", "Dutch", "English", "Estonian",
"Finnish", "French", "Galician", "German", "Greek",
"Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian",
"Italian", "Japanese", "Kannada", "Kazakh", "Korean",
"Latvian", "Lithuanian", "Macedonian", "Malay", "Marathi",
"Maori", "Nepali", "Norwegian", "Persian", "Polish",
"Portuguese", "Romanian", "Russian", "Serbian", "Slovak",
"Slovenian", "Spanish", "Swahili", "Swedish", "Tagalog",
"Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
"Vietnamese", "Welsh"
]
class GradioTranscriptionInterface:
"""
Interface handling the interaction between Gradio UI and the Audio Transcription system.
"""
def __init__(self, model: AutoTranscribe = AutoTranscribe()):
"""
Initializes the GradioTranscriptionInterface with a transcription model.
Args:
model (AutoTranscribe): Model responsible for audio transcription tasks.
"""
self.model = model
def auto_transcribe(self, source,
num_speakers : int,
translation : bool,
language : str):
"""
Shortcut method for the AutoTranscribe task.
Returns:
tuple: Transcribed text (str), JSON output (dict)
"""
kwargs = {
"num_speakers": num_speakers if num_speakers != 0 else None,
"language": language if language != "None" else None,
"task": 'translate' if translation else None
}
try:
result = self.model.autotranscribe(source, **kwargs)
except ValueError:
raise gr.Error("Couldn't detect any speech in the provided audio. \
Please try again!")
return str(result), result.get_json()
def transcribe(self, source, translation, language):
"""
Shortcut method for the Transcribe task.
Returns:
str: Transcribed text.
"""
kwargs = {
"language": language if language != "None" else None,
"task": 'translate' if translation == "Yes" else None
}
result = self.model.transcribe(source, **kwargs)
return str(result)
def perform_diarisation(self, source, num_speakers):
"""
Shortcut method for the Diarisation task.
Returns:
str: JSON output of diarisation result.
"""
kwargs = {
"num_speakers": num_speakers if num_speakers != 0 else None,
}
try:
result = self.model.diarization(source, **kwargs)
except ValueError:
raise gr.Error("Couldn't detect any speech in the provided audio. \
Please try again!")
return json.dumps(result, indent=2)
####
# Gradio Interface
####
pipe = GradioTranscriptionInterface()
def select_task(choice):
if choice == 'Auto Transcribe':
return (gr.update(visible = True),
gr.update(visible = True),
gr.update(visible = True))
elif choice == 'Transcribe':
return (gr.update(visible = False),
gr.update(visible = True),
gr.update(visible = True))
elif choice == 'Diarisation':
return (gr.update(visible = True),
gr.update(visible = False),
gr.update(visible = False))
def select_origin(choice):
if choice == "Upload Audio":
return (gr.update(visible = True),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None))
elif choice == "Record Audio":
return (gr.update(visible = False, value = None),
gr.update(visible = True),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None))
elif choice == "Upload Video":
return (gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = True),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None))
elif choice == "Record Video":
return (gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = True),
gr.update(visible = False, value = None))
elif choice == "File":
return (gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = False, value = None),
gr.update(visible = True))
def run_scribe(task, num_speakers, translate, language, audio1, audio2, video1, video2, file_in, progress = gr.Progress(track_tqdm= True)):
# get *args which are not None
progress(0, desc='Starting task...')
source = audio1 or audio2 or video1 or video2 or file_in
if task == 'Auto Transcribe':
out_str , out_json = pipe.auto_transcribe(source = source,
num_speakers = num_speakers,
translation = translate,
language = language)
return (gr.update(value = out_str, visible = True),
gr.update(value = out_json, visible = True),
gr.update(visible = True),
gr.update(visible = True))
elif task == 'Transcribe':
out = pipe.transcribe(source = source,
translation = translate,
language = language)
return (gr.update(value = out, visible = True),
gr.update(value = None, visible = False),
gr.update(visible = False),
gr.update(visible = False))
elif task == 'Diarisation':
out = pipe.perform_diarisation(source = source,
num_speakers = num_speakers)
return (gr.update(value = None, visible = False),
gr.update(value = out, visible = True),
gr.update(visible = False),
gr.update(visible = False))
def annotate_output(annoation : str, out_json : dict):
# get *args which are not None
trans = Transcript.from_json(out_json)
trans = trans.annotate(*annoation.split(","))
return gr.update(value = str(trans)),gr.update(value = trans.get_json())
with gr.Blocks(theme=theme,title='ScrAIbe: Automatic Audio Transcription') as demo:
# Define components
header = open("header.html", "r").read()
gr.HTML(header, visible= True, show_label=False)
with gr.Row():
with gr.Column():
task = gr.Radio(["Auto Transcribe", "Transcribe", "Diarisation"], label="Task",
value= 'Auto Transcribe')
num_speakers = gr.Number(value=0, label= "Number of speakers (optional)",
info = "Number of speakers in the audio file. If you don't know,\
leave it at 0.", visible= True)
translate = gr.Checkbox(label="Translation", choices=[True, False], value = False,
info="Select 'Yes' to have the output translated into English.",
visible= True)
language = gr.Dropdown(LANGUAGES,
label="Language (optional)", value = "None",
info="Language of the audio file. If you don't know,\
leave it at None.", visible= True)
input = gr.Radio(["Upload Audio", "Record Audio", "Upload Video","Record Video"
,"File"], label="Input Type", value="Upload Audio")
audio1 = gr.Audio(source="upload", type="filepath", label="Upload Audio",
interactive= True, visible= True)
audio2 = gr.Audio(source="microphone", label="Record Audio", type="filepath",
interactive= True, visible= False)
video1 = gr.Video(source="upload", type="filepath", label="Upload Video",
interactive= True, visible= False)
video2 = gr.Video(source="webcam", label="Record Video", type="filepath",
interactive= True, visible= False)
file_in = gr.File(label="Upload File", interactive= True, visible= False)
submit = gr.Button()
with gr.Column():
out_txt = gr.Textbox(label="Output",
visible= True, show_copy_button=True)
out_json = gr.JSON(label="JSON Output",
visible= False, show_copy_button=True)
annoation = gr.Textbox(label="Name your speaker's",
info= "Please provide a list of the speakers arranged \
in the order in which they appear in the input. Use comma ',' \
as a seperator. Be aware that the first name is given \
to SPEAKER_00 the second to SPEAKER_01 and so on.",
visible= False, interactive= True)
annotate = gr.Button(value="Annotate", visible= False, interactive= True)
# Define usage of components
input.change(fn=select_origin, inputs=[input],
outputs=[audio1, audio2, video1, video2, file_in])
task.change(fn=select_task, inputs=[task],
outputs=[num_speakers, translate, language])
translate.change(fn= lambda x : gr.update(value = x),
inputs=[translate], outputs=[translate])
num_speakers.change(fn= lambda x : gr.update(value = x),
inputs=[num_speakers], outputs=[num_speakers])
language.change(fn= lambda x : gr.update(value = x),
inputs=[language], outputs=[language])
submit.click(fn = run_scribe,
inputs=[task, num_speakers, translate, language, audio1,
audio2, video1, video2, file_in],
outputs=[out_txt, out_json, annoation, annotate])
annotate.click(fn = annotate_output, inputs=[annoation, out_json],
outputs=[out_txt, out_json])
demo.queue().launch()
View File
+1
View File
@@ -0,0 +1 @@
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

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 38 KiB

@@ -35,7 +35,7 @@ Usage:
import json import json
import gradio as gr import gradio as gr
from autotranscript import AutoTranscribe, Transcript from scraibe import AutoTranscribe, Transcript
theme = gr.themes.Soft( theme = gr.themes.Soft(
@@ -126,7 +126,6 @@ class AutoTranscribe:
diarisation = self.diariser.diarization(dia_audio, **kwargs) diarisation = self.diariser.diarization(dia_audio, **kwargs)
if not diarisation["segments"]: if not diarisation["segments"]:
print("No segments found. Try to run transcription without diarisation.") print("No segments found. Try to run transcription without diarisation.")
@@ -145,8 +144,6 @@ class AutoTranscribe:
# Transcribe each segment and store the results # Transcribe each segment and store the results
final_transcript = dict() final_transcript = dict()
for i in trange(len(diarisation["segments"]), desc= "Transcribing"): for i in trange(len(diarisation["segments"]), desc= "Transcribing"):
seg = diarisation["segments"][i] seg = diarisation["segments"][i]
@@ -1,69 +1,69 @@
import os import os
import subprocess as sp import subprocess as sp
MAJOR = 0 MAJOR = 0
MINOR = 1 MINOR = 1
MICRO = 0 MICRO = 0
MICRO_POST = 0 MICRO_POST = 0
ISRELEASED = False ISRELEASED = False
VERSION = '%d.%d.%d.%d' % (MAJOR, MINOR, MICRO, MICRO_POST) VERSION = '%d.%d.%d.%d' % (MAJOR, MINOR, MICRO, MICRO_POST)
# Return the git revision as a string # Return the git revision as a string
# taken from numpy/numpy # taken from numpy/numpy
def git_version(): def git_version():
def _minimal_ext_cmd(cmd): def _minimal_ext_cmd(cmd):
# construct minimal environment # construct minimal environment
env = {} env = {}
for k in ['SYSTEMROOT', 'PATH', 'HOME']: for k in ['SYSTEMROOT', 'PATH', 'HOME']:
v = os.environ.get(k) v = os.environ.get(k)
if v is not None: if v is not None:
env[k] = v env[k] = v
# LANGUAGE is used on win32 # LANGUAGE is used on win32
env['LANGUAGE'] = 'C' env['LANGUAGE'] = 'C'
env['LANG'] = 'C' env['LANG'] = 'C'
env['LC_ALL'] = 'C' env['LC_ALL'] = 'C'
out = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=env).communicate()[0] out = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, env=env).communicate()[0]
return out return out
try: try:
out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
GIT_REVISION = out.strip().decode('ascii') GIT_REVISION = out.strip().decode('ascii')
except OSError: except OSError:
GIT_REVISION = "Unknown" GIT_REVISION = "Unknown"
return GIT_REVISION return GIT_REVISION
def _get_git_version(): def _get_git_version():
cwd = os.getcwd() cwd = os.getcwd()
# go to the main directory # go to the main directory
fdir = os.path.dirname(os.path.abspath(__file__)) fdir = os.path.dirname(os.path.abspath(__file__))
maindir = os.path.abspath(os.path.join(fdir, "..")) maindir = os.path.abspath(os.path.join(fdir, ".."))
# maindir = fdir # os.path.join(fdir, "..") # maindir = fdir # os.path.join(fdir, "..")
os.chdir(maindir) os.chdir(maindir)
# get git version # get git version
res = git_version() res = git_version()
# restore the cwd # restore the cwd
os.chdir(cwd) os.chdir(cwd)
return res return res
def get_version(build_version=False): def get_version(build_version=False):
if ISRELEASED: if ISRELEASED:
return VERSION return VERSION
# unreleased version # unreleased version
GIT_REVISION = _get_git_version() GIT_REVISION = _get_git_version()
if build_version: if build_version:
import datetime as dt import datetime as dt
date = dt.date.strftime(dt.datetime.now(), "%Y%m%d%H%M%S") date = dt.date.strftime(dt.datetime.now(), "%Y%m%d%H%M%S")
return VERSION + ".dev" + date return VERSION + ".dev" + date
else: else:
return VERSION + ".dev0+" + GIT_REVISION[:7] return VERSION + ".dev0+" + GIT_REVISION[:7]
+3 -3
View File
@@ -2,7 +2,7 @@ import pkg_resources
import os import os
from setuptools import setup, find_packages from setuptools import setup, find_packages
module_name = "autotranscript" module_name = "scraibe"
github_url = "https://github.com/JSchmie/autotranscript" github_url = "https://github.com/JSchmie/autotranscript"
file_dir = os.path.dirname(os.path.realpath(__file__)) file_dir = os.path.dirname(os.path.realpath(__file__))
@@ -18,7 +18,7 @@ with open(verfile, "r") as fp:
############### setup ############### ############### setup ###############
build_version = "AUTOTRANSCRIPT_BUILD" in os.environ build_version = "SCRAIBE_BUILD" in os.environ
if __name__ == "__main__": if __name__ == "__main__":
@@ -42,5 +42,5 @@ if __name__ == "__main__":
description='Transcription tool for audio files based on Whisper and Pyannote', description='Transcription tool for audio files based on Whisper and Pyannote',
package_data={ "header" : ["app/header.html"], "logo" : ["app/Logo_KIDA_bmel_green.svg"]}, package_data={ "header" : ["app/header.html"], "logo" : ["app/Logo_KIDA_bmel_green.svg"]},
entry_points={'console_scripts': entry_points={'console_scripts':
['autotranscript = autotranscript.cli:cli']} ['scraibe = scraibe.cli:cli']}
) )
+3 -3
View File
@@ -1,5 +1,5 @@
import pytest import pytest
from autotranscript import Transcriber from scraibe import Transcriber
from unittest.mock import patch, mock_open from unittest.mock import patch, mock_open
import os import os
@@ -55,7 +55,7 @@ def test_save_transcript_to_file(transcriber):
# Test Diaraization class # Test Diaraization class
from autotranscript import Diariser from scraibe import Diariser
@pytest.fixture @pytest.fixture
def diarisation(): def diarisation():
@@ -83,7 +83,7 @@ def test_diarisation(diarisation):
# Test AudioProcessor # Test AudioProcessor
from autotranscript import AudioProcessor , TorchAudioProcessor from scraibe import AudioProcessor , TorchAudioProcessor
def test_AudioProcessor_init(): def test_AudioProcessor_init():
+2 -4
View File
@@ -18,16 +18,14 @@
# os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models") # os.environ['HF_HOME'] = os.path.expanduser("~/PycharmProjects/autotranscript/autotranscript/models")
from autotranscript import AutoTranscribe from scraibe import AutoTranscribe
model = AutoTranscribe() model = AutoTranscribe()
text = model.transcribe("test.mp4") text = model.autotranscribe('kida.mp4', num_speakers=2)
print("Transcription:\n") print("Transcription:\n")
print(text) print(text)
# from autotranscript.misc import * # from autotranscript.misc import *
# import os # import os