From 49e999f0ee8a882fea3119b3b857659d608a241d Mon Sep 17 00:00:00 2001
From: ScrAIbe Admin <admin@apstrom.ca>
Date: Sun, 14 Jun 2026 18:05:37 +0000
Subject: [PATCH] Add Identify speakers option: AI infers names and replaces
 Speaker IDs in transcript

---
 scraibe/tasks.py | 86 ++++++++++++++++++++++++++++++++++++++++++++++++
 scraibe/webui.py |  9 +++++
 2 files changed, 95 insertions(+)

diff --git a/scraibe/tasks.py b/scraibe/tasks.py
index 26ffac9..74f099b 100644
--- a/scraibe/tasks.py
+++ b/scraibe/tasks.py
@@ -10,6 +10,7 @@ from datetime import datetime
 
 from .celery_app import celery_app
 from .autotranscript import Scraibe
+from .summarizer import SummarizerClient, SummarizerError
 from .misc import setup_logging
 from .email_sender import send_email, EmailError, load_template
 from .email_sender import create_transcript_docx, create_summary_docx
@@ -238,6 +239,7 @@ def process_transcription_task(
     email_to: str,
     email_cc: str,
     include_summary: bool,
+    identify_speakers: bool = False,
 ):
     """
     Async task: transcribe audio, optionally summarize, then email results.
@@ -294,6 +296,90 @@ def process_transcription_task(
             segments = result.get("segments", [])
             raw_result = result.get("raw_result")
 
+        # 3b) Optional speaker identification
+        speaker_map = {}  # e.g. {"SPEAKER 1": "John", "SPEAKER 2": "Maria"}
+        if identify_speakers:
+            try:
+                # Use the same summarizer client as transcript_and_summarize
+                scraibe._ensure_summarizer()
+                summarizer = scraibe._summarizer
+
+                prompt = (
+                    "Below is a transcript with speaker labels like 'SPEAKER 1', 'SPEAKER 2', etc. "
+                    "Based on how they speak and the context, suggest realistic names for each speaker. "
+                    "Do not add extra commentary. Output ONLY a mapping in this exact format, one per line:
+SPEAKER 1: Suggested Name
+SPEAKER 2: Suggested Name
+SPEAKER 3: Suggested Name
+
+Transcript:
+" + transcript_text
+                )
+
+                response = summarizer._chat_completion(
+                    messages=[{"role": "user", "content": prompt}],
+                    temperature=0.3,
+                    max_tokens=300,
+                )
+                reply = (response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
+
+                # Parse mapping
+                import re
+                for m in re.finditer(
+                    r"SPEAKER\s+(\d+)\s*:\s*(.+)",
+                    reply,
+                    re.IGNORECASE,
+                ):
+                    spk = f"SPEAKER {m.group(1).strip()}"
+                    name = m.group(2).strip().rstrip(".")
+                    if name:
+                        speaker_map[spk] = name
+
+                logger.info("Speaker identification mapping: %s", speaker_map)
+
+                # Apply mapping to transcript text
+                if speaker_map:
+                    def replace_speaker(m):
+                        label = m.group(0).strip()
+                        # normalize to "SPEAKER N"
+                        normalized = re.sub(
+                            r"\s+",
+                            " ",
+                            re.sub(r"[^A-Z0-9\s]", "", label.upper()),
+                        ).strip()
+                        return speaker_map.get(normalized, label)
+
+                    # Replace in lines like "[00:12] SPEAKER 1:" but preserve timestamp and colon
+                    def replace_in_line(line: str) -> str:
+                        # match after timestamp bracket and space: "SPEAKER N:"
+                        return re.sub(
+                            r"(\[\d+:\d+(?::\d+)?\]\s*)([A-Z\s]+?):\s*",
+                            lambda m: m.group(1) + (speaker_map.get(m.group(2).strip(), m.group(2)) + ": "),
+                            line,
+                        )
+
+                    transcript_lines = transcript_text.splitlines()
+                    transcript_text = "\n".join(
+                        replace_in_line(line) for line in transcript_lines
+                    )
+
+                    # Also update segments for JSON export
+                    updated_segments = []
+                    for seg in segments:
+                        sp = (seg.get("speaker") or "").strip()
+                        sp_norm = re.sub(r"[^A-Z0-9\s]", "", sp.upper()).strip()
+                        sp_new = speaker_map.get(sp_norm, sp)
+                        seg = dict(seg)
+                        seg["speaker"] = sp_new
+                        updated_segments.append(seg)
+                    segments = updated_segments
+
+            except (SummarizerError, Exception) as e:
+                logger.warning(
+                    "Speaker identification failed; falling back to Speaker IDs: %s", e
+                )
+                speaker_map = {}
+
         # 4) Prepare files
 
         # Transcript .md
diff --git a/scraibe/webui.py b/scraibe/webui.py
index 35fe90e..b9dad14 100644
--- a/scraibe/webui.py
+++ b/scraibe/webui.py
@@ -135,6 +135,12 @@ def create_app():
                         label="Task",
                     )
 
+                identify_speakers = gr.Checkbox(
+                    label="Identify speakers (best effort using AI)",
+                    value=False,
+                    info="If enabled, AI will attempt to infer real names for speakers and replace Speaker 1/2/etc. in the transcript."
+                )
+
                 with gr.Row():
                     language_input = gr.Textbox(
                         label="Language (optional)",
@@ -188,6 +194,7 @@ def create_app():
             num_speakers,
             email_to_val,
             email_cc_val,
+            identify_speakers_val,
         ):
             if not audio:
                 return "Please upload or record audio."
@@ -225,6 +232,7 @@ def create_app():
                     email_to=email_to_val,
                     email_cc=email_cc_val or None,
                     include_summary=(task == "transcript_and_summarize"),
+                    identify_speakers=bool(identify_speakers_val),
                 )
             except Exception as e:
                 logger.error("Error enqueuing job: %s", e)
@@ -247,6 +255,7 @@ def create_app():
                 num_speakers_input,
                 email_to,
                 email_cc,
+                identify_speakers,
             ],
             outputs=[status_text],
         )