diff --git a/scraibe/autotranscript.py b/scraibe/autotranscript.py index 3b13ee8..2c99e66 100644 --- a/scraibe/autotranscript.py +++ b/scraibe/autotranscript.py @@ -17,7 +17,7 @@ but ignored when not relevant. import os import logging -from typing import Union, Optional +from typing import Union, Optional, Dict, Any from .localai_client import LocalAIClient, LocalAIError from .summarizer import SummarizerClient, SummarizerError @@ -120,21 +120,21 @@ class Scraibe: def transcribe( self, - audio_file: Union[str], + audio_file: str, + *, + for_export: bool = False, **kwargs, - ) -> str: + ) -> Union[str, Dict[str, Any]]: """ Transcribe the provided audio file using LocalAI. - Uses /v1/audio/diarization with vibevoice.cpp, then concatenates - all segment texts. - - Args: - audio_file (str): Path to the audio file. - **kwargs: Additional keyword arguments (some forwarded, others ignored). - + Uses /v1/audio/diarization with vibevoice.cpp (verbose_json). Returns: - str: The concatenated transcribed text. + - If for_export=False: plain transcript text (str). + - If for_export=True: dict with: + - transcript: plain text + - segments: list[segment] with speaker labels + - raw_result: full verbose_json from LocalAI (if present) """ if isinstance(audio_file, str): if not os.path.exists(audio_file): @@ -152,31 +152,70 @@ class Scraibe: audio_path=audio_file, include_text=True, verbose=verbose, + return_raw=True, **kwargs, ) except LocalAIError as e: logger.error("Error during LocalAI transcription: %s", e) raise LocalAIError(f"Error during LocalAI transcription: {e}") + segments = result.get("segments", []) + speakers = result.get("speakers", []) transcripts = result.get("transcripts", []) - text = " ".join(t.strip() for t in transcripts if t.strip()) - logger.info("transcribe completed, length=%d chars", len(text)) - return text + + # Build simple transcript text + if for_export: + # Include speaker-labeled transcript + lines = [] + for seg, speaker, text in zip(segments, speakers, transcripts): + start, end = seg + ts = self._format_timestamp(start) + line = f"[{ts}] {speaker}: {text.strip()}" + lines.append(line) + full_text = "\n\n".join(lines) + else: + # Legacy: space-joined text + full_text = " ".join(t.strip() for t in transcripts if t.strip()) + + logger.info("transcribe completed, length=%d chars", len(full_text)) + + if for_export: + # Return richer structure for JSON export + raw_result = result.get("raw_result") + return { + "transcript": full_text, + "segments": [ + { + "id": i, + "speaker": sp, + "start": seg[0], + "end": seg[1], + "text": txt, + } + for i, (seg, sp, txt) in enumerate( + zip(segments, speakers, transcripts) + ) + ], + "raw_result": raw_result if raw_result is not None else None, + } + + return full_text def transcript_and_summarize( self, - audio_file: Union[str], + audio_file: str, *, summarizer_api_url: Optional[str] = None, summarizer_api_key: Optional[str] = None, summarizer_model: Optional[str] = None, + for_export: bool = False, **kwargs, ) -> dict: """ Transcribe the audio file and generate a detailed summary. Steps: - - Transcribe via LocalAI. + - Transcribe via LocalAI (verbose_json). - Build a plain-text transcript (with speaker labels). - Summarize the transcript using the configured LLM. @@ -184,6 +223,8 @@ class Scraibe: dict with: - transcript: full transcript text (with speaker labels) - summary: final detailed summary (markdown-ready) + - segments: (if for_export) list[segment] with speaker labels + - raw_result: (if for_export) full verbose_json from LocalAI """ if isinstance(audio_file, str): if not os.path.exists(audio_file): @@ -202,6 +243,7 @@ class Scraibe: audio_path=audio_file, include_text=True, verbose=verbose, + return_raw=True, **kwargs, ) except LocalAIError as e: @@ -249,11 +291,30 @@ class Scraibe: logger.info("transcript_and_summarize completed.") - return { + out = { "transcript": full_transcript, "summary": summary, } + if for_export: + # Add segments and raw_result for JSON export + raw_result = result.get("raw_result") + out["segments"] = [ + { + "id": i, + "speaker": sp, + "start": seg[0], + "end": seg[1], + "text": txt, + } + for i, (seg, sp, txt) in enumerate( + zip(segments, speakers, transcripts) + ) + ] + out["raw_result"] = raw_result if raw_result is not None else None + + return out + # ----------------- # Helpers # ----------------- diff --git a/scraibe/email_sender.py b/scraibe/email_sender.py new file mode 100644 index 0000000..ce5ed56 --- /dev/null +++ b/scraibe/email_sender.py @@ -0,0 +1,147 @@ +""" +Email sender module for ScrAIbe. + +Sends transcription outputs (TXT, JSON, etc.) via SMTP. +All credentials are configured via environment variables. +""" + +import os +import smtplib +import logging +from email import encoders +from email.mime.base import MIMEBase +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import List, Optional + +logger = logging.getLogger("scraibe.email_sender") + + +class EmailError(Exception): + pass + + +def get_email_config(): + """ + Read email configuration from environment variables. + Raises EmailError if required fields are missing. + """ + smtp_host = os.getenv("EMAIL_SMTP_HOST") + smtp_port = os.getenv("EMAIL_SMTP_PORT") + smtp_user = os.getenv("EMAIL_SMTP_USER") + smtp_password = os.getenv("EMAIL_SMTP_PASSWORD") + from_address = os.getenv("EMAIL_FROM_ADDRESS") + use_tls_str = os.getenv("EMAIL_SMTP_USE_TLS", "true").strip().lower() + use_tls = use_tls_str not in ("false", "0", "no") + + if not all([smtp_host, smtp_port, smtp_user, smtp_password, from_address]): + raise EmailError( + "Email configuration incomplete. " + "Ensure EMAIL_SMTP_HOST, EMAIL_SMTP_PORT, EMAIL_SMTP_USER, " + "EMAIL_SMTP_PASSWORD, and EMAIL_FROM_ADDRESS are set." + ) + + return { + "smtp_host": smtp_host, + "smtp_port": int(smtp_port), + "smtp_user": smtp_user, + "smtp_password": smtp_password, + "from_address": from_address, + "use_tls": use_tls, + } + + +def send_email( + to: str, + subject: str, + body: str, + attachments: List[str], + cc: Optional[str] = None, +) -> bool: + """ + Send an email with optional file attachments. + + Args: + to: Comma-separated list of recipient email addresses. + subject: Email subject. + body: Email body (plain text). + attachments: List of file paths to attach. + cc: Comma-separated list of CC email addresses (optional). + + Returns: + True if sent successfully. + + Raises: + EmailError if sending fails. + """ + try: + cfg = get_email_config() + except EmailError as e: + logger.error("Email configuration error: %s", e) + raise + + # Parse recipients + to_list = [addr.strip() for addr in to.split(",") if addr.strip()] + cc_list = [addr.strip() for addr in cc.split(",") if addr.strip()] if cc else [] + + if not to_list: + raise EmailError("No valid 'To' email addresses provided.") + + # Build message + msg = MIMEMultipart() + msg["From"] = cfg["from_address"] + msg["To"] = ", ".join(to_list) + if cc_list: + msg["Cc"] = ", ".join(cc_list) + msg["Subject"] = subject + + msg.attach(MIMEText(body, "plain")) + + # Attach files + for file_path in attachments: + if not os.path.isfile(file_path): + logger.warning("Attachment file not found, skipping: %s", file_path) + continue + + try: + with open(file_path, "rb") as f: + part = MIMEBase("application", "octet-stream") + part.set_payload(f.read()) + encoders.encode_base64(part) + part.add_header( + "Content-Disposition", + "attachment", + filename=os.path.basename(file_path), + ) + msg.attach(part) + except Exception as e: + logger.warning("Failed to attach file %s: %s", file_path, e) + + # Connect and send + try: + if cfg["use_tls"]: + server = smtplib.SMTP(cfg["smtp_host"], cfg["smtp_port"], timeout=30) + server.ehlo() + server.starttls() + server.ehlo() + else: + server = smtplib.SMTP(cfg["smtp_host"], cfg["smtp_port"], timeout=30) + server.ehlo() + + server.login(cfg["smtp_user"], cfg["smtp_password"]) + server.sendmail( + cfg["from_address"], + to_list + cc_list, + msg.as_string(), + ) + server.quit() + logger.info( + "Email sent to %s (CC: %s)", + to_list, + cc_list or "None", + ) + return True + + except Exception as e: + logger.error("Failed to send email: %s", e) + raise EmailError(f"Failed to send email: {e}") diff --git a/scraibe/localai_client.py b/scraibe/localai_client.py index 4d10070..714c5a7 100644 --- a/scraibe/localai_client.py +++ b/scraibe/localai_client.py @@ -39,7 +39,7 @@ class LocalAIClient: Responsibilities: - Read configuration from environment. - Upload audio file as multipart/form-data. - - Parse diarization + transcription response. + - Parse diarization + transcription response (verbose_json). - Map response into the same structure expected by Scraibe's Transcript. """ @@ -106,20 +106,13 @@ class LocalAIClient: response_format: Optional[str] = None, include_text: Optional[bool] = None, verbose: bool = False, + return_raw: bool = False, **_ignored, ) -> Dict[str, Any]: """ - Send audio to LocalAI /v1/audio/diarization and return a dict - in the same style as the previous internal diarization output: - - { - "segments": [ [start, end], ... ], - "speakers": [ "SPEAKER_00", ... ], - "transcripts": [ "text for segment", ... ] - } - - Extra kwargs that the old UI used (e.g., whisper-specific) are - accepted but ignored. + Send audio to LocalAI /v1/audio/diarization and return: + - A normalized dict with segments, speakers, transcripts. + - Optionally, the raw verbose_json response (for JSON export). Args: audio_path: Path to the audio file. @@ -131,16 +124,18 @@ class LocalAIClient: min_duration_on: Optional min segment duration. min_duration_off: Optional min gap duration. response_format: "json", "verbose_json", or "rttm". - Defaults to "verbose_json" if not set. + Defaults to "verbose_json". include_text: Whether to request per-segment text. Defaults to True. verbose: If True, prints progress messages. + return_raw: If True, also return the raw API response in 'raw_result'. """ if verbose: print("Starting diarization and transcription via LocalAI.") logger.info("diarize_and_transcribe requested for: %s", audio_path) + # Always use verbose_json for diarization + speaker info if response_format is None: response_format = "verbose_json" if include_text is None: @@ -202,7 +197,7 @@ class LocalAIClient: ) try: - result = resp.json() + raw_result = resp.json() except json.JSONDecodeError: logger.error("Failed to parse LocalAI response as JSON.") raise LocalAIError( @@ -212,11 +207,16 @@ class LocalAIClient: if verbose: print("Diarization and transcription finished. Starting post-processing.") - return self._parse_diarization_response(result) + parsed = self._parse_diarization_response(raw_result) + + if return_raw: + parsed["raw_result"] = raw_result + + return parsed def _parse_diarization_response(self, result: Dict[str, Any]) -> Dict[str, Any]: """ - Convert LocalAI response into the internal format used by Scraibe: + Convert LocalAI verbose_json response into the internal format used by Scraibe: { "segments": [ [start, end], ... ], "speakers": [ "SPEAKER_00", ... ], diff --git a/scraibe/webui.py b/scraibe/webui.py index 1b5d7ee..8c656a3 100644 --- a/scraibe/webui.py +++ b/scraibe/webui.py @@ -7,13 +7,16 @@ Runs the Web GUI that: - Sends audio to LocalAI for transcription + diarization - Optionally sends transcript to a second LLM for summarization - Returns transcript (and summary) in the browser +- Optionally emails transcript files (TXT + JSON) This is the default entrypoint when running in Docker. """ import os +import json import logging import tempfile +from datetime import datetime import gradio as gr @@ -70,10 +73,23 @@ def create_app(): ) # Helper: run transcription via LocalAI API - def run_transcribe(audio_path, task, language, num_speakers): + def run_transcribe( + audio_path, + task, + language, + num_speakers, + send_email_flag, + email_to, + email_cc, + email_subject, + ): if not audio_path: raise ValueError("No audio file provided.") + email_status = "" + attachments = [] + + # Ensure we use rich export mode (for JSON with diarization) try: if task == "transcript_and_summarize": result = scraibe.transcript_and_summarize( @@ -81,11 +97,14 @@ def create_app(): language=language or None, num_speakers=int(num_speakers) if num_speakers else None, verbose=True, + for_export=True, ) transcript_text = result.get("transcript", "") summary_text = result.get("summary", "") + segments = result.get("segments", []) + raw_result = result.get("raw_result") - # Save as .md + # Save as .md (transcript + summary) md_path = tempfile.mktemp(suffix=".md") with open(md_path, "w", encoding="utf-8") as f: f.write("# Transcript\n\n") @@ -93,32 +112,74 @@ def create_app(): f.write("\n\n# Summary\n\n") f.write(summary_text) - return ( - transcript_text, - summary_text, - md_path, - "Transcription and summarization completed.", - ) + # Save as .txt (plain transcript) + txt_path = tempfile.mktemp(suffix=".txt") + with open(txt_path, "w", encoding="utf-8") as f: + f.write(transcript_text) + + # Save as .json (diarization + transcript + summary) + json_data = { + "task": "transcript_and_summarize", + "transcript": transcript_text, + "summary": summary_text, + "segments": segments, + "metadata": { + "timestamp": datetime.utcnow().isoformat() + }, + } + if raw_result is not None: + json_data["raw_result"] = raw_result + + json_path = tempfile.mktemp(suffix=".json") + with open(json_path, "w", encoding="utf-8") as f: + json.dump(json_data, f, indent=2, ensure_ascii=False) + + # Prepare attachments for email + if send_email_flag: + attachments = [txt_path, json_path] + + status_msg = "Transcription and summarization completed." + else: - # Default: transcribe only - text = scraibe.transcribe( + # transcribe only (with diarization) + result = scraibe.transcribe( audio_file=audio_path, language=language or None, num_speakers=int(num_speakers) if num_speakers else None, verbose=True, + for_export=True, ) + transcript_text = result.get("transcript", "") + segments = result.get("segments", []) + raw_result = result.get("raw_result") - # Save as .txt + # Save as .txt (plain transcript) txt_path = tempfile.mktemp(suffix=".txt") with open(txt_path, "w", encoding="utf-8") as f: - f.write(text) + f.write(transcript_text) + + # Save as .json (diarization + transcript) + json_data = { + "task": "transcribe", + "transcript": transcript_text, + "segments": segments, + "metadata": { + "timestamp": datetime.utcnow().isoformat() + }, + } + if raw_result is not None: + json_data["raw_result"] = raw_result + + json_path = tempfile.mktemp(suffix=".json") + with open(json_path, "w", encoding="utf-8") as f: + json.dump(json_data, f, indent=2, ensure_ascii=False) + + # Prepare attachments for email + if send_email_flag: + attachments = [txt_path, json_path] + + status_msg = "Transcription completed." - return ( - text, - "", - txt_path, - "Transcription completed.", - ) except Exception as e: logger.error("Error during transcription: %s", e) return ( @@ -126,8 +187,56 @@ def create_app(): "", None, f"Error: {e}", + "", ) + # Handle email after successful transcription + if send_email_flag and attachments: + try: + from .email_sender import send_email, EmailError + except ImportError: + email_status = "Email feature unavailable (email_sender not found)." + else: + to = (email_to or "").strip() + cc = (email_cc or "").strip() + subject = (email_subject or "").strip() + + if not to: + email_status = "Email not sent: 'To' address is empty." + else: + if not subject: + subject = f"ScrAIbe Transcript - {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}" + + body = ( + "Please find the transcription files attached.\n" + "This message was generated by ScrAIbe.\n" + ) + + try: + send_email( + to=to, + cc=cc or None, + subject=subject, + body=body, + attachments=attachments, + ) + email_status = "Transcript files sent via email." + except EmailError as e: + email_status = f"Email failed: {e}" + except Exception as e: + email_status = f"Email failed: {e}" + + # Use md_path for file_output in transcript_and_summarize, else txt_path + file_path = md_path if task == "transcript_and_summarize" else txt_path + + return ( + transcript_text, + summary_text if task == "transcript_and_summarize" else "", + file_path, + status_msg, + email_status, + ) + # Load header/footer HTML if present header_path = layout_cfg.get("header", "/app/src/misc/header.html") footer_path = layout_cfg.get("footer", "/app/src/misc/footer.html") @@ -180,6 +289,31 @@ def create_app(): precision=0, ) + # Email options + send_email_checkbox = gr.Checkbox( + label="Send transcript files via email" + ) + + with gr.Group(visible=False) as email_group: + email_to = gr.Textbox( + label="To (comma-separated)", + placeholder="e.g. name@example.com", + ) + email_cc = gr.Textbox( + label="CC (optional, comma-separated)", + placeholder="e.g. manager@example.com", + ) + email_subject = gr.Textbox( + label="Subject (optional)", + placeholder="Default: ScrAIbe Transcript - ", + ) + + send_email_checkbox.change( + fn=lambda v: gr.update(visible=v), + inputs=[send_email_checkbox], + outputs=[email_group], + ) + transcribe_btn = gr.Button("Start", variant="primary") with gr.Column(scale=3): @@ -201,6 +335,11 @@ def create_app(): label="Status", interactive=False, ) + email_status_text = gr.Textbox( + label="Email status", + interactive=False, + visible=True, + ) # Footer if footer_html: @@ -218,20 +357,34 @@ def create_app(): outputs=[summary_text], ) - def on_transcribe(audio, task, language, num_speakers): + def on_transcribe( + audio, + task, + language, + num_speakers, + send_email_flag, + email_to_val, + email_cc_val, + email_subject_val, + ): if not audio: return ( "", "", None, "Please upload or record audio.", + "", ) - transcript, summary, file_path, msg = run_transcribe( + transcript, summary, file_path, status_msg, email_status = run_transcribe( audio_path=audio, task=task, language=language, num_speakers=num_speakers, + send_email_flag=bool(send_email_flag), + email_to=email_to_val, + email_cc=email_cc_val, + email_subject=email_subject_val, ) show_summary = bool(summary) @@ -239,7 +392,8 @@ def create_app(): transcript, summary, file_path if file_path else None, - msg, + status_msg, + email_status, ) transcribe_btn.click( @@ -249,12 +403,17 @@ def create_app(): task_choice, language_input, num_speakers_input, + send_email_checkbox, + email_to, + email_cc, + email_subject, ], outputs=[ output_text, summary_text, file_output, status_text, + email_status_text, ], )