Implement async processing with Celery, Redis, and queue-based email notifications
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 14:38:10 +00:00
parent b9d25a39dd
commit 2803c81b44
5 changed files with 362 additions and 274 deletions
+73 -268
View File
@@ -1,26 +1,28 @@
"""
ScrAIbe Web GUI (Gradio)
------------------------
ScrAIbe Web GUI (Gradio) - Async Mode
-------------------------------------
Runs the Web GUI that:
- Accepts audio uploads
- Sends audio to LocalAI for transcription + diarization
- Optionally sends transcript to a second LLM for summarization
- Returns transcript (and summary) in the browser
- Optionally emails transcript files (TXT + JSON)
- Enqueues transcription jobs asynchronously via Celery
- Backend worker:
- Transcribes (with diarization)
- Optionally summarizes
- Emails the user:
- Immediately: confirmation + queue position
- On success: transcript + JSON (+ summary if requested)
- On error: error details
This is the default entrypoint when running in Docker.
"""
import os
import json
import logging
import tempfile
import shutil
from datetime import datetime
import gradio as gr
from .autotranscript import Scraibe
from .misc import setup_logging
logger = logging.getLogger("scraibe.webui")
@@ -45,7 +47,7 @@ def load_config():
def create_app():
"""
Create and launch the Gradio Web GUI.
Create and launch the Gradio Web GUI (async mode).
"""
# Logging
@@ -57,185 +59,11 @@ def create_app():
layout_cfg = config.get("layout", {})
launch_cfg = config.get("launch", {})
logger.info("Starting ScrAIbe Web GUI.")
logger.info("Starting ScrAIbe Web GUI (async mode).")
# Initialize Scraibe (LocalAI-backed)
# If LocalAI is unreachable at startup, still launch the UI
# and let individual transcription calls fail with a clear message.
scraibe = None
try:
scraibe = Scraibe(verbose=True)
except Exception as e:
logger.warning(
"Failed to initialize Scraibe at startup (LocalAI may be down): %s. "
"Web GUI will start; transcription will fail until LocalAI is reachable.",
e,
)
# Helper: run transcription via LocalAI API
def run_transcribe(
audio_path,
task,
language,
num_speakers,
send_email_flag,
email_to,
email_cc,
email_subject,
):
if not audio_path:
raise ValueError("No audio file provided.")
email_status = ""
attachments = []
# Ensure we use rich export mode (for JSON with diarization)
try:
if task == "transcript_and_summarize":
result = scraibe.transcript_and_summarize(
audio_file=audio_path,
language=language or None,
num_speakers=int(num_speakers) if num_speakers else None,
verbose=True,
for_export=True,
)
transcript_text = result.get("transcript", "")
summary_text = result.get("summary", "")
segments = result.get("segments", [])
raw_result = result.get("raw_result")
# Save as .md (transcript + summary)
md_path = tempfile.mktemp(suffix=".md")
with open(md_path, "w", encoding="utf-8") as f:
f.write("# Transcript\n\n")
f.write(transcript_text)
f.write("\n\n# Summary\n\n")
f.write(summary_text)
# Save as .txt (plain transcript)
txt_path = tempfile.mktemp(suffix=".txt")
with open(txt_path, "w", encoding="utf-8") as f:
f.write(transcript_text)
# Save as .json (diarization + transcript + summary)
json_data = {
"task": "transcript_and_summarize",
"transcript": transcript_text,
"summary": summary_text,
"segments": segments,
"metadata": {
"timestamp": datetime.utcnow().isoformat()
},
}
if raw_result is not None:
json_data["raw_result"] = raw_result
json_path = tempfile.mktemp(suffix=".json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=2, ensure_ascii=False)
# Prepare attachments for email
if send_email_flag:
attachments = [txt_path, json_path]
status_msg = "Transcription and summarization completed."
else:
# transcribe only (with diarization)
result = scraibe.transcribe(
audio_file=audio_path,
language=language or None,
num_speakers=int(num_speakers) if num_speakers else None,
verbose=True,
for_export=True,
)
transcript_text = result.get("transcript", "")
segments = result.get("segments", [])
raw_result = result.get("raw_result")
# Save as .txt (plain transcript)
txt_path = tempfile.mktemp(suffix=".txt")
with open(txt_path, "w", encoding="utf-8") as f:
f.write(transcript_text)
# Save as .json (diarization + transcript)
json_data = {
"task": "transcribe",
"transcript": transcript_text,
"segments": segments,
"metadata": {
"timestamp": datetime.utcnow().isoformat()
},
}
if raw_result is not None:
json_data["raw_result"] = raw_result
json_path = tempfile.mktemp(suffix=".json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=2, ensure_ascii=False)
# Prepare attachments for email
if send_email_flag:
attachments = [txt_path, json_path]
status_msg = "Transcription completed."
except Exception as e:
logger.error("Error during transcription: %s", e)
return (
"",
"",
None,
f"Error: {e}",
"",
)
# Handle email after successful transcription
if send_email_flag and attachments:
try:
from .email_sender import send_email, EmailError
except ImportError:
email_status = "Email feature unavailable (email_sender not found)."
else:
to = (email_to or "").strip()
cc = (email_cc or "").strip()
subject = (email_subject or "").strip()
if not to:
email_status = "Email not sent: 'To' address is empty."
else:
if not subject:
subject = f"ScrAIbe Transcript - {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}"
body = (
"Please find the transcription files attached.\n"
"This message was generated by ScrAIbe.\n"
)
try:
send_email(
to=to,
cc=cc or None,
subject=subject,
body=body,
attachments=attachments,
)
email_status = "Transcript files sent via email."
except EmailError as e:
email_status = f"Email failed: {e}"
except Exception as e:
email_status = f"Email failed: {e}"
# Use md_path for file_output in transcript_and_summarize, else txt_path
file_path = md_path if task == "transcript_and_summarize" else txt_path
return (
transcript_text,
summary_text if task == "transcript_and_summarize" else "",
file_path,
status_msg,
email_status,
)
# Ensure upload directory exists
upload_dir = os.getenv("SCRAIBE_UPLOAD_DIR", "/tmp/scraibe_uploads")
os.makedirs(upload_dir, exist_ok=True)
# Load header/footer HTML if present
header_path = layout_cfg.get("header", "/app/src/misc/header.html")
@@ -253,7 +81,6 @@ def create_app():
footer_html = f.read()
# Build Gradio interface
# In Gradio 6.0+, css must be passed to launch(), not Blocks()
with gr.Blocks(
title="A.P.Strom Transcription",
) as app:
@@ -289,57 +116,25 @@ def create_app():
precision=0,
)
# Email options
send_email_checkbox = gr.Checkbox(
label="Send transcript files via email"
# Email is required in async mode
email_to = gr.Textbox(
label="Your email address (required)",
placeholder="e.g. your.name@example.com",
)
with gr.Group(visible=False) as email_group:
email_to = gr.Textbox(
label="To (comma-separated)",
placeholder="e.g. name@example.com",
)
email_cc = gr.Textbox(
label="CC (optional, comma-separated)",
placeholder="e.g. manager@example.com",
)
email_subject = gr.Textbox(
label="Subject (optional)",
placeholder="Default: ScrAIbe Transcript - <date>",
)
send_email_checkbox.change(
fn=lambda v: gr.update(visible=v),
inputs=[send_email_checkbox],
outputs=[email_group],
email_cc = gr.Textbox(
label="CC (optional, comma-separated)",
placeholder="e.g. manager@example.com",
)
transcribe_btn = gr.Button("Start", variant="primary")
submit_btn = gr.Button("Submit for transcription", variant="primary")
with gr.Column(scale=3):
output_text = gr.Textbox(
label="Transcript",
lines=10,
interactive=False,
)
summary_text = gr.Textbox(
label="Summary",
lines=10,
interactive=False,
visible=False,
)
file_output = gr.File(
label="Download transcript/summary",
)
status_text = gr.Textbox(
label="Status",
lines=6,
interactive=False,
)
email_status_text = gr.Textbox(
label="Email status",
interactive=False,
visible=True,
)
# Footer
if footer_html:
@@ -348,73 +143,83 @@ def create_app():
# Events
def on_task_change(value):
show_summary = (value == "transcript_and_summarize")
return gr.update(visible=show_summary)
# No special UI changes needed; both modes handled in backend
return
task_choice.change(
fn=on_task_change,
inputs=[task_choice],
outputs=[summary_text],
outputs=[],
)
def on_transcribe(
def on_submit(
audio,
task,
language,
num_speakers,
send_email_flag,
email_to_val,
email_cc_val,
email_subject_val,
):
if not audio:
return "Please upload or record audio."
email_to_val = (email_to_val or "").strip()
if not email_to_val:
return "Please enter your email address."
# Copy uploaded file to a stable location
try:
ext = os.path.splitext(audio)[1] or ".wav"
ts = datetime.utcnow().strftime("%Y%m%d%H%M%S%f")
new_name = f"upload_{ts}{ext}"
dest_path = os.path.join(upload_dir, new_name)
shutil.copy2(audio, dest_path)
except Exception as e:
logger.error("Error copying uploaded file: %s", e)
return f"Error saving your file: {e}"
# Import Celery task
try:
from .tasks import process_transcription_task
except ImportError:
return (
"",
"",
None,
"Please upload or record audio.",
"",
"Error: async processing is not available (Celery not configured)."
)
transcript, summary, file_path, status_msg, email_status = run_transcribe(
audio_path=audio,
task=task,
language=language,
num_speakers=num_speakers,
send_email_flag=bool(send_email_flag),
email_to=email_to_val,
email_cc=email_cc_val,
email_subject=email_subject_val,
)
# Enqueue transcription job
try:
task_result = process_transcription_task.delay(
audio_path=dest_path,
task_type=task,
language=language or None,
num_speakers=int(num_speakers) if num_speakers else None,
email_to=email_to_val,
email_cc=email_cc_val or None,
include_summary=(task == "transcript_and_summarize"),
)
except Exception as e:
logger.error("Error enqueuing job: %s", e)
return f"Error submitting your file: {e}"
show_summary = bool(summary)
return (
transcript,
summary,
file_path if file_path else None,
status_msg,
email_status,
"Your audio file has been received and added to the queue.\n"
"We have sent a confirmation email to you.\n"
"You will receive another email with your transcript (and summary, if requested) "
"once processing is complete.\n"
f"Job ID: {task_result.id}"
)
transcribe_btn.click(
fn=on_transcribe,
submit_btn.click(
fn=on_submit,
inputs=[
audio_input,
task_choice,
language_input,
num_speakers_input,
send_email_checkbox,
email_to,
email_cc,
email_subject,
],
outputs=[
output_text,
summary_text,
file_output,
status_text,
email_status_text,
],
outputs=[status_text],
)
# Launch options