Use verbose_json diarization, add JSON+TXT email feature
This commit is contained in:
+78
-17
@@ -17,7 +17,7 @@ but ignored when not relevant.
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional, Dict, Any
|
||||||
|
|
||||||
from .localai_client import LocalAIClient, LocalAIError
|
from .localai_client import LocalAIClient, LocalAIError
|
||||||
from .summarizer import SummarizerClient, SummarizerError
|
from .summarizer import SummarizerClient, SummarizerError
|
||||||
@@ -120,21 +120,21 @@ class Scraibe:
|
|||||||
|
|
||||||
def transcribe(
|
def transcribe(
|
||||||
self,
|
self,
|
||||||
audio_file: Union[str],
|
audio_file: str,
|
||||||
|
*,
|
||||||
|
for_export: bool = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> str:
|
) -> Union[str, Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Transcribe the provided audio file using LocalAI.
|
Transcribe the provided audio file using LocalAI.
|
||||||
|
|
||||||
Uses /v1/audio/diarization with vibevoice.cpp, then concatenates
|
Uses /v1/audio/diarization with vibevoice.cpp (verbose_json).
|
||||||
all segment texts.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
audio_file (str): Path to the audio file.
|
|
||||||
**kwargs: Additional keyword arguments (some forwarded, others ignored).
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The concatenated transcribed text.
|
- If for_export=False: plain transcript text (str).
|
||||||
|
- If for_export=True: dict with:
|
||||||
|
- transcript: plain text
|
||||||
|
- segments: list[segment] with speaker labels
|
||||||
|
- raw_result: full verbose_json from LocalAI (if present)
|
||||||
"""
|
"""
|
||||||
if isinstance(audio_file, str):
|
if isinstance(audio_file, str):
|
||||||
if not os.path.exists(audio_file):
|
if not os.path.exists(audio_file):
|
||||||
@@ -152,31 +152,70 @@ class Scraibe:
|
|||||||
audio_path=audio_file,
|
audio_path=audio_file,
|
||||||
include_text=True,
|
include_text=True,
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
|
return_raw=True,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
except LocalAIError as e:
|
except LocalAIError as e:
|
||||||
logger.error("Error during LocalAI transcription: %s", e)
|
logger.error("Error during LocalAI transcription: %s", e)
|
||||||
raise LocalAIError(f"Error during LocalAI transcription: {e}")
|
raise LocalAIError(f"Error during LocalAI transcription: {e}")
|
||||||
|
|
||||||
|
segments = result.get("segments", [])
|
||||||
|
speakers = result.get("speakers", [])
|
||||||
transcripts = result.get("transcripts", [])
|
transcripts = result.get("transcripts", [])
|
||||||
text = " ".join(t.strip() for t in transcripts if t.strip())
|
|
||||||
logger.info("transcribe completed, length=%d chars", len(text))
|
# Build simple transcript text
|
||||||
return text
|
if for_export:
|
||||||
|
# Include speaker-labeled transcript
|
||||||
|
lines = []
|
||||||
|
for seg, speaker, text in zip(segments, speakers, transcripts):
|
||||||
|
start, end = seg
|
||||||
|
ts = self._format_timestamp(start)
|
||||||
|
line = f"[{ts}] {speaker}: {text.strip()}"
|
||||||
|
lines.append(line)
|
||||||
|
full_text = "\n\n".join(lines)
|
||||||
|
else:
|
||||||
|
# Legacy: space-joined text
|
||||||
|
full_text = " ".join(t.strip() for t in transcripts if t.strip())
|
||||||
|
|
||||||
|
logger.info("transcribe completed, length=%d chars", len(full_text))
|
||||||
|
|
||||||
|
if for_export:
|
||||||
|
# Return richer structure for JSON export
|
||||||
|
raw_result = result.get("raw_result")
|
||||||
|
return {
|
||||||
|
"transcript": full_text,
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"id": i,
|
||||||
|
"speaker": sp,
|
||||||
|
"start": seg[0],
|
||||||
|
"end": seg[1],
|
||||||
|
"text": txt,
|
||||||
|
}
|
||||||
|
for i, (seg, sp, txt) in enumerate(
|
||||||
|
zip(segments, speakers, transcripts)
|
||||||
|
)
|
||||||
|
],
|
||||||
|
"raw_result": raw_result if raw_result is not None else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
return full_text
|
||||||
|
|
||||||
def transcript_and_summarize(
|
def transcript_and_summarize(
|
||||||
self,
|
self,
|
||||||
audio_file: Union[str],
|
audio_file: str,
|
||||||
*,
|
*,
|
||||||
summarizer_api_url: Optional[str] = None,
|
summarizer_api_url: Optional[str] = None,
|
||||||
summarizer_api_key: Optional[str] = None,
|
summarizer_api_key: Optional[str] = None,
|
||||||
summarizer_model: Optional[str] = None,
|
summarizer_model: Optional[str] = None,
|
||||||
|
for_export: bool = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Transcribe the audio file and generate a detailed summary.
|
Transcribe the audio file and generate a detailed summary.
|
||||||
|
|
||||||
Steps:
|
Steps:
|
||||||
- Transcribe via LocalAI.
|
- Transcribe via LocalAI (verbose_json).
|
||||||
- Build a plain-text transcript (with speaker labels).
|
- Build a plain-text transcript (with speaker labels).
|
||||||
- Summarize the transcript using the configured LLM.
|
- Summarize the transcript using the configured LLM.
|
||||||
|
|
||||||
@@ -184,6 +223,8 @@ class Scraibe:
|
|||||||
dict with:
|
dict with:
|
||||||
- transcript: full transcript text (with speaker labels)
|
- transcript: full transcript text (with speaker labels)
|
||||||
- summary: final detailed summary (markdown-ready)
|
- summary: final detailed summary (markdown-ready)
|
||||||
|
- segments: (if for_export) list[segment] with speaker labels
|
||||||
|
- raw_result: (if for_export) full verbose_json from LocalAI
|
||||||
"""
|
"""
|
||||||
if isinstance(audio_file, str):
|
if isinstance(audio_file, str):
|
||||||
if not os.path.exists(audio_file):
|
if not os.path.exists(audio_file):
|
||||||
@@ -202,6 +243,7 @@ class Scraibe:
|
|||||||
audio_path=audio_file,
|
audio_path=audio_file,
|
||||||
include_text=True,
|
include_text=True,
|
||||||
verbose=verbose,
|
verbose=verbose,
|
||||||
|
return_raw=True,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
except LocalAIError as e:
|
except LocalAIError as e:
|
||||||
@@ -249,11 +291,30 @@ class Scraibe:
|
|||||||
|
|
||||||
logger.info("transcript_and_summarize completed.")
|
logger.info("transcript_and_summarize completed.")
|
||||||
|
|
||||||
return {
|
out = {
|
||||||
"transcript": full_transcript,
|
"transcript": full_transcript,
|
||||||
"summary": summary,
|
"summary": summary,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if for_export:
|
||||||
|
# Add segments and raw_result for JSON export
|
||||||
|
raw_result = result.get("raw_result")
|
||||||
|
out["segments"] = [
|
||||||
|
{
|
||||||
|
"id": i,
|
||||||
|
"speaker": sp,
|
||||||
|
"start": seg[0],
|
||||||
|
"end": seg[1],
|
||||||
|
"text": txt,
|
||||||
|
}
|
||||||
|
for i, (seg, sp, txt) in enumerate(
|
||||||
|
zip(segments, speakers, transcripts)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
out["raw_result"] = raw_result if raw_result is not None else None
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
# -----------------
|
# -----------------
|
||||||
# Helpers
|
# Helpers
|
||||||
# -----------------
|
# -----------------
|
||||||
|
|||||||
@@ -0,0 +1,147 @@
|
|||||||
|
"""
|
||||||
|
Email sender module for ScrAIbe.
|
||||||
|
|
||||||
|
Sends transcription outputs (TXT, JSON, etc.) via SMTP.
|
||||||
|
All credentials are configured via environment variables.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import smtplib
|
||||||
|
import logging
|
||||||
|
from email import encoders
|
||||||
|
from email.mime.base import MIMEBase
|
||||||
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
from email.mime.text import MIMEText
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger("scraibe.email_sender")
|
||||||
|
|
||||||
|
|
||||||
|
class EmailError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def get_email_config():
|
||||||
|
"""
|
||||||
|
Read email configuration from environment variables.
|
||||||
|
Raises EmailError if required fields are missing.
|
||||||
|
"""
|
||||||
|
smtp_host = os.getenv("EMAIL_SMTP_HOST")
|
||||||
|
smtp_port = os.getenv("EMAIL_SMTP_PORT")
|
||||||
|
smtp_user = os.getenv("EMAIL_SMTP_USER")
|
||||||
|
smtp_password = os.getenv("EMAIL_SMTP_PASSWORD")
|
||||||
|
from_address = os.getenv("EMAIL_FROM_ADDRESS")
|
||||||
|
use_tls_str = os.getenv("EMAIL_SMTP_USE_TLS", "true").strip().lower()
|
||||||
|
use_tls = use_tls_str not in ("false", "0", "no")
|
||||||
|
|
||||||
|
if not all([smtp_host, smtp_port, smtp_user, smtp_password, from_address]):
|
||||||
|
raise EmailError(
|
||||||
|
"Email configuration incomplete. "
|
||||||
|
"Ensure EMAIL_SMTP_HOST, EMAIL_SMTP_PORT, EMAIL_SMTP_USER, "
|
||||||
|
"EMAIL_SMTP_PASSWORD, and EMAIL_FROM_ADDRESS are set."
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"smtp_host": smtp_host,
|
||||||
|
"smtp_port": int(smtp_port),
|
||||||
|
"smtp_user": smtp_user,
|
||||||
|
"smtp_password": smtp_password,
|
||||||
|
"from_address": from_address,
|
||||||
|
"use_tls": use_tls,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def send_email(
|
||||||
|
to: str,
|
||||||
|
subject: str,
|
||||||
|
body: str,
|
||||||
|
attachments: List[str],
|
||||||
|
cc: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Send an email with optional file attachments.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
to: Comma-separated list of recipient email addresses.
|
||||||
|
subject: Email subject.
|
||||||
|
body: Email body (plain text).
|
||||||
|
attachments: List of file paths to attach.
|
||||||
|
cc: Comma-separated list of CC email addresses (optional).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if sent successfully.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
EmailError if sending fails.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cfg = get_email_config()
|
||||||
|
except EmailError as e:
|
||||||
|
logger.error("Email configuration error: %s", e)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Parse recipients
|
||||||
|
to_list = [addr.strip() for addr in to.split(",") if addr.strip()]
|
||||||
|
cc_list = [addr.strip() for addr in cc.split(",") if addr.strip()] if cc else []
|
||||||
|
|
||||||
|
if not to_list:
|
||||||
|
raise EmailError("No valid 'To' email addresses provided.")
|
||||||
|
|
||||||
|
# Build message
|
||||||
|
msg = MIMEMultipart()
|
||||||
|
msg["From"] = cfg["from_address"]
|
||||||
|
msg["To"] = ", ".join(to_list)
|
||||||
|
if cc_list:
|
||||||
|
msg["Cc"] = ", ".join(cc_list)
|
||||||
|
msg["Subject"] = subject
|
||||||
|
|
||||||
|
msg.attach(MIMEText(body, "plain"))
|
||||||
|
|
||||||
|
# Attach files
|
||||||
|
for file_path in attachments:
|
||||||
|
if not os.path.isfile(file_path):
|
||||||
|
logger.warning("Attachment file not found, skipping: %s", file_path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "rb") as f:
|
||||||
|
part = MIMEBase("application", "octet-stream")
|
||||||
|
part.set_payload(f.read())
|
||||||
|
encoders.encode_base64(part)
|
||||||
|
part.add_header(
|
||||||
|
"Content-Disposition",
|
||||||
|
"attachment",
|
||||||
|
filename=os.path.basename(file_path),
|
||||||
|
)
|
||||||
|
msg.attach(part)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to attach file %s: %s", file_path, e)
|
||||||
|
|
||||||
|
# Connect and send
|
||||||
|
try:
|
||||||
|
if cfg["use_tls"]:
|
||||||
|
server = smtplib.SMTP(cfg["smtp_host"], cfg["smtp_port"], timeout=30)
|
||||||
|
server.ehlo()
|
||||||
|
server.starttls()
|
||||||
|
server.ehlo()
|
||||||
|
else:
|
||||||
|
server = smtplib.SMTP(cfg["smtp_host"], cfg["smtp_port"], timeout=30)
|
||||||
|
server.ehlo()
|
||||||
|
|
||||||
|
server.login(cfg["smtp_user"], cfg["smtp_password"])
|
||||||
|
server.sendmail(
|
||||||
|
cfg["from_address"],
|
||||||
|
to_list + cc_list,
|
||||||
|
msg.as_string(),
|
||||||
|
)
|
||||||
|
server.quit()
|
||||||
|
logger.info(
|
||||||
|
"Email sent to %s (CC: %s)",
|
||||||
|
to_list,
|
||||||
|
cc_list or "None",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to send email: %s", e)
|
||||||
|
raise EmailError(f"Failed to send email: {e}")
|
||||||
+16
-16
@@ -39,7 +39,7 @@ class LocalAIClient:
|
|||||||
Responsibilities:
|
Responsibilities:
|
||||||
- Read configuration from environment.
|
- Read configuration from environment.
|
||||||
- Upload audio file as multipart/form-data.
|
- Upload audio file as multipart/form-data.
|
||||||
- Parse diarization + transcription response.
|
- Parse diarization + transcription response (verbose_json).
|
||||||
- Map response into the same structure expected by Scraibe's Transcript.
|
- Map response into the same structure expected by Scraibe's Transcript.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -106,20 +106,13 @@ class LocalAIClient:
|
|||||||
response_format: Optional[str] = None,
|
response_format: Optional[str] = None,
|
||||||
include_text: Optional[bool] = None,
|
include_text: Optional[bool] = None,
|
||||||
verbose: bool = False,
|
verbose: bool = False,
|
||||||
|
return_raw: bool = False,
|
||||||
**_ignored,
|
**_ignored,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Send audio to LocalAI /v1/audio/diarization and return a dict
|
Send audio to LocalAI /v1/audio/diarization and return:
|
||||||
in the same style as the previous internal diarization output:
|
- A normalized dict with segments, speakers, transcripts.
|
||||||
|
- Optionally, the raw verbose_json response (for JSON export).
|
||||||
{
|
|
||||||
"segments": [ [start, end], ... ],
|
|
||||||
"speakers": [ "SPEAKER_00", ... ],
|
|
||||||
"transcripts": [ "text for segment", ... ]
|
|
||||||
}
|
|
||||||
|
|
||||||
Extra kwargs that the old UI used (e.g., whisper-specific) are
|
|
||||||
accepted but ignored.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
audio_path: Path to the audio file.
|
audio_path: Path to the audio file.
|
||||||
@@ -131,16 +124,18 @@ class LocalAIClient:
|
|||||||
min_duration_on: Optional min segment duration.
|
min_duration_on: Optional min segment duration.
|
||||||
min_duration_off: Optional min gap duration.
|
min_duration_off: Optional min gap duration.
|
||||||
response_format: "json", "verbose_json", or "rttm".
|
response_format: "json", "verbose_json", or "rttm".
|
||||||
Defaults to "verbose_json" if not set.
|
Defaults to "verbose_json".
|
||||||
include_text: Whether to request per-segment text.
|
include_text: Whether to request per-segment text.
|
||||||
Defaults to True.
|
Defaults to True.
|
||||||
verbose: If True, prints progress messages.
|
verbose: If True, prints progress messages.
|
||||||
|
return_raw: If True, also return the raw API response in 'raw_result'.
|
||||||
"""
|
"""
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Starting diarization and transcription via LocalAI.")
|
print("Starting diarization and transcription via LocalAI.")
|
||||||
|
|
||||||
logger.info("diarize_and_transcribe requested for: %s", audio_path)
|
logger.info("diarize_and_transcribe requested for: %s", audio_path)
|
||||||
|
|
||||||
|
# Always use verbose_json for diarization + speaker info
|
||||||
if response_format is None:
|
if response_format is None:
|
||||||
response_format = "verbose_json"
|
response_format = "verbose_json"
|
||||||
if include_text is None:
|
if include_text is None:
|
||||||
@@ -202,7 +197,7 @@ class LocalAIClient:
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = resp.json()
|
raw_result = resp.json()
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.error("Failed to parse LocalAI response as JSON.")
|
logger.error("Failed to parse LocalAI response as JSON.")
|
||||||
raise LocalAIError(
|
raise LocalAIError(
|
||||||
@@ -212,11 +207,16 @@ class LocalAIClient:
|
|||||||
if verbose:
|
if verbose:
|
||||||
print("Diarization and transcription finished. Starting post-processing.")
|
print("Diarization and transcription finished. Starting post-processing.")
|
||||||
|
|
||||||
return self._parse_diarization_response(result)
|
parsed = self._parse_diarization_response(raw_result)
|
||||||
|
|
||||||
|
if return_raw:
|
||||||
|
parsed["raw_result"] = raw_result
|
||||||
|
|
||||||
|
return parsed
|
||||||
|
|
||||||
def _parse_diarization_response(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
def _parse_diarization_response(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Convert LocalAI response into the internal format used by Scraibe:
|
Convert LocalAI verbose_json response into the internal format used by Scraibe:
|
||||||
{
|
{
|
||||||
"segments": [ [start, end], ... ],
|
"segments": [ [start, end], ... ],
|
||||||
"speakers": [ "SPEAKER_00", ... ],
|
"speakers": [ "SPEAKER_00", ... ],
|
||||||
|
|||||||
+180
-21
@@ -7,13 +7,16 @@ Runs the Web GUI that:
|
|||||||
- Sends audio to LocalAI for transcription + diarization
|
- Sends audio to LocalAI for transcription + diarization
|
||||||
- Optionally sends transcript to a second LLM for summarization
|
- Optionally sends transcript to a second LLM for summarization
|
||||||
- Returns transcript (and summary) in the browser
|
- Returns transcript (and summary) in the browser
|
||||||
|
- Optionally emails transcript files (TXT + JSON)
|
||||||
|
|
||||||
This is the default entrypoint when running in Docker.
|
This is the default entrypoint when running in Docker.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
|
|
||||||
@@ -70,10 +73,23 @@ def create_app():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Helper: run transcription via LocalAI API
|
# Helper: run transcription via LocalAI API
|
||||||
def run_transcribe(audio_path, task, language, num_speakers):
|
def run_transcribe(
|
||||||
|
audio_path,
|
||||||
|
task,
|
||||||
|
language,
|
||||||
|
num_speakers,
|
||||||
|
send_email_flag,
|
||||||
|
email_to,
|
||||||
|
email_cc,
|
||||||
|
email_subject,
|
||||||
|
):
|
||||||
if not audio_path:
|
if not audio_path:
|
||||||
raise ValueError("No audio file provided.")
|
raise ValueError("No audio file provided.")
|
||||||
|
|
||||||
|
email_status = ""
|
||||||
|
attachments = []
|
||||||
|
|
||||||
|
# Ensure we use rich export mode (for JSON with diarization)
|
||||||
try:
|
try:
|
||||||
if task == "transcript_and_summarize":
|
if task == "transcript_and_summarize":
|
||||||
result = scraibe.transcript_and_summarize(
|
result = scraibe.transcript_and_summarize(
|
||||||
@@ -81,11 +97,14 @@ def create_app():
|
|||||||
language=language or None,
|
language=language or None,
|
||||||
num_speakers=int(num_speakers) if num_speakers else None,
|
num_speakers=int(num_speakers) if num_speakers else None,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
|
for_export=True,
|
||||||
)
|
)
|
||||||
transcript_text = result.get("transcript", "")
|
transcript_text = result.get("transcript", "")
|
||||||
summary_text = result.get("summary", "")
|
summary_text = result.get("summary", "")
|
||||||
|
segments = result.get("segments", [])
|
||||||
|
raw_result = result.get("raw_result")
|
||||||
|
|
||||||
# Save as .md
|
# Save as .md (transcript + summary)
|
||||||
md_path = tempfile.mktemp(suffix=".md")
|
md_path = tempfile.mktemp(suffix=".md")
|
||||||
with open(md_path, "w", encoding="utf-8") as f:
|
with open(md_path, "w", encoding="utf-8") as f:
|
||||||
f.write("# Transcript\n\n")
|
f.write("# Transcript\n\n")
|
||||||
@@ -93,32 +112,74 @@ def create_app():
|
|||||||
f.write("\n\n# Summary\n\n")
|
f.write("\n\n# Summary\n\n")
|
||||||
f.write(summary_text)
|
f.write(summary_text)
|
||||||
|
|
||||||
return (
|
# Save as .txt (plain transcript)
|
||||||
transcript_text,
|
txt_path = tempfile.mktemp(suffix=".txt")
|
||||||
summary_text,
|
with open(txt_path, "w", encoding="utf-8") as f:
|
||||||
md_path,
|
f.write(transcript_text)
|
||||||
"Transcription and summarization completed.",
|
|
||||||
)
|
# Save as .json (diarization + transcript + summary)
|
||||||
|
json_data = {
|
||||||
|
"task": "transcript_and_summarize",
|
||||||
|
"transcript": transcript_text,
|
||||||
|
"summary": summary_text,
|
||||||
|
"segments": segments,
|
||||||
|
"metadata": {
|
||||||
|
"timestamp": datetime.utcnow().isoformat()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if raw_result is not None:
|
||||||
|
json_data["raw_result"] = raw_result
|
||||||
|
|
||||||
|
json_path = tempfile.mktemp(suffix=".json")
|
||||||
|
with open(json_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
# Prepare attachments for email
|
||||||
|
if send_email_flag:
|
||||||
|
attachments = [txt_path, json_path]
|
||||||
|
|
||||||
|
status_msg = "Transcription and summarization completed."
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Default: transcribe only
|
# transcribe only (with diarization)
|
||||||
text = scraibe.transcribe(
|
result = scraibe.transcribe(
|
||||||
audio_file=audio_path,
|
audio_file=audio_path,
|
||||||
language=language or None,
|
language=language or None,
|
||||||
num_speakers=int(num_speakers) if num_speakers else None,
|
num_speakers=int(num_speakers) if num_speakers else None,
|
||||||
verbose=True,
|
verbose=True,
|
||||||
|
for_export=True,
|
||||||
)
|
)
|
||||||
|
transcript_text = result.get("transcript", "")
|
||||||
|
segments = result.get("segments", [])
|
||||||
|
raw_result = result.get("raw_result")
|
||||||
|
|
||||||
# Save as .txt
|
# Save as .txt (plain transcript)
|
||||||
txt_path = tempfile.mktemp(suffix=".txt")
|
txt_path = tempfile.mktemp(suffix=".txt")
|
||||||
with open(txt_path, "w", encoding="utf-8") as f:
|
with open(txt_path, "w", encoding="utf-8") as f:
|
||||||
f.write(text)
|
f.write(transcript_text)
|
||||||
|
|
||||||
|
# Save as .json (diarization + transcript)
|
||||||
|
json_data = {
|
||||||
|
"task": "transcribe",
|
||||||
|
"transcript": transcript_text,
|
||||||
|
"segments": segments,
|
||||||
|
"metadata": {
|
||||||
|
"timestamp": datetime.utcnow().isoformat()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if raw_result is not None:
|
||||||
|
json_data["raw_result"] = raw_result
|
||||||
|
|
||||||
|
json_path = tempfile.mktemp(suffix=".json")
|
||||||
|
with open(json_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
# Prepare attachments for email
|
||||||
|
if send_email_flag:
|
||||||
|
attachments = [txt_path, json_path]
|
||||||
|
|
||||||
|
status_msg = "Transcription completed."
|
||||||
|
|
||||||
return (
|
|
||||||
text,
|
|
||||||
"",
|
|
||||||
txt_path,
|
|
||||||
"Transcription completed.",
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error during transcription: %s", e)
|
logger.error("Error during transcription: %s", e)
|
||||||
return (
|
return (
|
||||||
@@ -126,6 +187,54 @@ def create_app():
|
|||||||
"",
|
"",
|
||||||
None,
|
None,
|
||||||
f"Error: {e}",
|
f"Error: {e}",
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle email after successful transcription
|
||||||
|
if send_email_flag and attachments:
|
||||||
|
try:
|
||||||
|
from .email_sender import send_email, EmailError
|
||||||
|
except ImportError:
|
||||||
|
email_status = "Email feature unavailable (email_sender not found)."
|
||||||
|
else:
|
||||||
|
to = (email_to or "").strip()
|
||||||
|
cc = (email_cc or "").strip()
|
||||||
|
subject = (email_subject or "").strip()
|
||||||
|
|
||||||
|
if not to:
|
||||||
|
email_status = "Email not sent: 'To' address is empty."
|
||||||
|
else:
|
||||||
|
if not subject:
|
||||||
|
subject = f"ScrAIbe Transcript - {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}"
|
||||||
|
|
||||||
|
body = (
|
||||||
|
"Please find the transcription files attached.\n"
|
||||||
|
"This message was generated by ScrAIbe.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
send_email(
|
||||||
|
to=to,
|
||||||
|
cc=cc or None,
|
||||||
|
subject=subject,
|
||||||
|
body=body,
|
||||||
|
attachments=attachments,
|
||||||
|
)
|
||||||
|
email_status = "Transcript files sent via email."
|
||||||
|
except EmailError as e:
|
||||||
|
email_status = f"Email failed: {e}"
|
||||||
|
except Exception as e:
|
||||||
|
email_status = f"Email failed: {e}"
|
||||||
|
|
||||||
|
# Use md_path for file_output in transcript_and_summarize, else txt_path
|
||||||
|
file_path = md_path if task == "transcript_and_summarize" else txt_path
|
||||||
|
|
||||||
|
return (
|
||||||
|
transcript_text,
|
||||||
|
summary_text if task == "transcript_and_summarize" else "",
|
||||||
|
file_path,
|
||||||
|
status_msg,
|
||||||
|
email_status,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load header/footer HTML if present
|
# Load header/footer HTML if present
|
||||||
@@ -180,6 +289,31 @@ def create_app():
|
|||||||
precision=0,
|
precision=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Email options
|
||||||
|
send_email_checkbox = gr.Checkbox(
|
||||||
|
label="Send transcript files via email"
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Group(visible=False) as email_group:
|
||||||
|
email_to = gr.Textbox(
|
||||||
|
label="To (comma-separated)",
|
||||||
|
placeholder="e.g. name@example.com",
|
||||||
|
)
|
||||||
|
email_cc = gr.Textbox(
|
||||||
|
label="CC (optional, comma-separated)",
|
||||||
|
placeholder="e.g. manager@example.com",
|
||||||
|
)
|
||||||
|
email_subject = gr.Textbox(
|
||||||
|
label="Subject (optional)",
|
||||||
|
placeholder="Default: ScrAIbe Transcript - <date>",
|
||||||
|
)
|
||||||
|
|
||||||
|
send_email_checkbox.change(
|
||||||
|
fn=lambda v: gr.update(visible=v),
|
||||||
|
inputs=[send_email_checkbox],
|
||||||
|
outputs=[email_group],
|
||||||
|
)
|
||||||
|
|
||||||
transcribe_btn = gr.Button("Start", variant="primary")
|
transcribe_btn = gr.Button("Start", variant="primary")
|
||||||
|
|
||||||
with gr.Column(scale=3):
|
with gr.Column(scale=3):
|
||||||
@@ -201,6 +335,11 @@ def create_app():
|
|||||||
label="Status",
|
label="Status",
|
||||||
interactive=False,
|
interactive=False,
|
||||||
)
|
)
|
||||||
|
email_status_text = gr.Textbox(
|
||||||
|
label="Email status",
|
||||||
|
interactive=False,
|
||||||
|
visible=True,
|
||||||
|
)
|
||||||
|
|
||||||
# Footer
|
# Footer
|
||||||
if footer_html:
|
if footer_html:
|
||||||
@@ -218,20 +357,34 @@ def create_app():
|
|||||||
outputs=[summary_text],
|
outputs=[summary_text],
|
||||||
)
|
)
|
||||||
|
|
||||||
def on_transcribe(audio, task, language, num_speakers):
|
def on_transcribe(
|
||||||
|
audio,
|
||||||
|
task,
|
||||||
|
language,
|
||||||
|
num_speakers,
|
||||||
|
send_email_flag,
|
||||||
|
email_to_val,
|
||||||
|
email_cc_val,
|
||||||
|
email_subject_val,
|
||||||
|
):
|
||||||
if not audio:
|
if not audio:
|
||||||
return (
|
return (
|
||||||
"",
|
"",
|
||||||
"",
|
"",
|
||||||
None,
|
None,
|
||||||
"Please upload or record audio.",
|
"Please upload or record audio.",
|
||||||
|
"",
|
||||||
)
|
)
|
||||||
|
|
||||||
transcript, summary, file_path, msg = run_transcribe(
|
transcript, summary, file_path, status_msg, email_status = run_transcribe(
|
||||||
audio_path=audio,
|
audio_path=audio,
|
||||||
task=task,
|
task=task,
|
||||||
language=language,
|
language=language,
|
||||||
num_speakers=num_speakers,
|
num_speakers=num_speakers,
|
||||||
|
send_email_flag=bool(send_email_flag),
|
||||||
|
email_to=email_to_val,
|
||||||
|
email_cc=email_cc_val,
|
||||||
|
email_subject=email_subject_val,
|
||||||
)
|
)
|
||||||
|
|
||||||
show_summary = bool(summary)
|
show_summary = bool(summary)
|
||||||
@@ -239,7 +392,8 @@ def create_app():
|
|||||||
transcript,
|
transcript,
|
||||||
summary,
|
summary,
|
||||||
file_path if file_path else None,
|
file_path if file_path else None,
|
||||||
msg,
|
status_msg,
|
||||||
|
email_status,
|
||||||
)
|
)
|
||||||
|
|
||||||
transcribe_btn.click(
|
transcribe_btn.click(
|
||||||
@@ -249,12 +403,17 @@ def create_app():
|
|||||||
task_choice,
|
task_choice,
|
||||||
language_input,
|
language_input,
|
||||||
num_speakers_input,
|
num_speakers_input,
|
||||||
|
send_email_checkbox,
|
||||||
|
email_to,
|
||||||
|
email_cc,
|
||||||
|
email_subject,
|
||||||
],
|
],
|
||||||
outputs=[
|
outputs=[
|
||||||
output_text,
|
output_text,
|
||||||
summary_text,
|
summary_text,
|
||||||
file_output,
|
file_output,
|
||||||
status_text,
|
status_text,
|
||||||
|
email_status_text,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user