diff --git a/scraibe/email_sender.py b/scraibe/email_sender.py index cbb7a2b..31006b5 100644 --- a/scraibe/email_sender.py +++ b/scraibe/email_sender.py @@ -284,25 +284,13 @@ def send_email( raise EmailError(f"Failed to send email: {e}") -def create_transcript_docx(text: str, filename: str): - """ - Create a .docx transcript with: - - 1.5" left margin, 1" right margin - - 12pt Courier - - Continuous line numbering on the left - - Speaker names capitalized and indented; spoken text further indented - - No section headings; use bold/underline only. - """ - doc = Document() - - # Set margins via section properties +def _setup_docx_style(doc): section = doc.sections[0] section.left_margin = Inches(1.5) section.right_margin = Inches(1.0) section.top_margin = Inches(1.0) section.bottom_margin = Inches(1.0) - # Enable continuous line numbering on the left sectPr = section._sectPr lnNumType = sectPr.find(qn("w:lnNumType")) if lnNumType is None: @@ -311,85 +299,82 @@ def create_transcript_docx(text: str, filename: str): lnNumType.set(qn("w:start"), "continuous") lnNumType.set(qn("w:countBy"), "1") - # Default font style = doc.styles["Normal"] font = style.font font.name = "Courier" font.size = Pt(12) - # Parse lines + +def _add_cover_page(doc, doc_type, date, description): + p_type = doc.add_paragraph() + run_type = p_type.add_run(doc_type) + run_type.bold = True + run_type.font.name = "Courier" + run_type.font.size = Pt(12) + + p_date = doc.add_paragraph() + run_date = p_date.add_run(date) + run_date.font.name = "Courier" + run_date.font.size = Pt(12) + + for _ in range(3): + doc.add_paragraph() + + p_desc = doc.add_paragraph() + run_desc = p_desc.add_run(description) + run_desc.font.name = "Courier" + run_desc.font.size = Pt(12) + + doc.add_page_break() + + +def _add_transcript_content(doc, text): lines = text.strip().split("\n") for line in lines: line = line.strip() if not line: continue - # Try to parse: [00:00] SPEAKER: text m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line) if m: ts, speaker, content = m.groups() - # Single paragraph: [timestamp] SPEAKER NAME underlined, then text inline p = doc.add_paragraph() p.paragraph_format.left_indent = Inches(0.25) - # Timestamp + speaker name (underline only, not bold) run_label = p.add_run(f"[{ts}] {speaker.upper()}:") run_label.bold = False run_label.underline = True run_label.font.name = "Courier" run_label.font.size = Pt(12) - # Space after colon (no underline) run_space = p.add_run(" ") run_space.bold = False run_space.underline = False run_space.font.name = "Courier" run_space.font.size = Pt(12) - # Spoken text (no underline, no bold) run_txt = p.add_run(content.strip()) run_txt.bold = False run_txt.underline = False run_txt.font.name = "Courier" run_txt.font.size = Pt(12) else: - # Fallback for non-standard lines p = doc.add_paragraph() run = p.add_run(line) run.font.name = "Courier" run.font.size = Pt(12) - doc.save(filename) - - -def create_summary_docx(text: str, filename: str): - """ - Create a .docx summary with consistent font. - Translates markdown headings into WYSIWYG formatting: - - First heading: bold - - Second heading: italic - - Third heading: underline - - Fourth heading: italic + underline - No section headings; use bold/underline only. - """ - doc = Document() - style = doc.styles["Normal"] - font = style.font - font.name = "Courier" - font.size = Pt(12) - - heading_count = 0 # track headings in order +def _add_summary_content(doc, text): + heading_count = 0 for line in text.splitlines(): stripped = line.strip() if not stripped: continue - # Detect markdown-style headings: #, ##, ###, #### m = re.match(r"^(#{1,4})\s+(.*)", stripped) if m: heading_count += 1 - level = len(m.group(1)) # 1..4 content = m.group(2).strip() p = doc.add_paragraph() @@ -399,7 +384,6 @@ def create_summary_docx(text: str, filename: str): run.font.name = "Courier" run.font.size = Pt(12) - # Apply formatting based on this heading's ordinal position if heading_count == 1: run.bold = True elif heading_count == 2: @@ -410,8 +394,84 @@ def create_summary_docx(text: str, filename: str): run.italic = True run.underline = True else: - # Normal text line p = doc.add_paragraph(stripped) p.paragraph_format.space_after = Pt(4) + +def create_transcript_docx( + text: str, + filename: str, + include_cover: bool = False, + cover_date: str = "", + cover_desc: str = "", +): + """ + Create a .docx transcript with: + - 1.5" left margin, 1" right margin + - 12pt Courier + - Continuous line numbering on the left + - Optional cover page with type, date, and AI-generated description. + """ + doc = Document() + _setup_docx_style(doc) + + if include_cover and cover_date and cover_desc: + _add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc) + + _add_transcript_content(doc, text) + doc.save(filename) + + +def create_summary_docx( + text: str, + filename: str, + include_cover: bool = False, + cover_date: str = "", + cover_desc: str = "", +): + """ + Create a .docx summary with consistent font and heading styles. + Optional cover page with type, date, and AI-generated description. + """ + doc = Document() + _setup_docx_style(doc) + + if include_cover and cover_date and cover_desc: + _add_cover_page(doc, "SUMMARY", cover_date, cover_desc) + + _add_summary_content(doc, text) + doc.save(filename) + + +def create_combined_docx( + transcript_text: str, + summary_text: str, + filename: str, + transcript_cover_date: str, + transcript_cover_desc: str, + summary_cover_date: str, + summary_cover_desc: str, +): + """ + Create a combined .docx with: + - Transcript cover page + - Transcript content + - Page break + - Summary cover page + - Summary content + """ + doc = Document() + _setup_docx_style(doc) + + # Transcript cover page + _add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc) + _add_transcript_content(doc, transcript_text) + + # Page break before summary + doc.add_page_break() + + # Summary cover page + _add_cover_page(doc, "SUMMARY", summary_cover_date, summary_cover_desc) + _add_summary_content(doc, summary_text) + doc.save(filename) diff --git a/scraibe/tasks.py b/scraibe/tasks.py index acc4fd8..aa90c9a 100644 --- a/scraibe/tasks.py +++ b/scraibe/tasks.py @@ -14,6 +14,7 @@ from .summarizer import SummarizerClient, SummarizerError from .misc import setup_logging from .email_sender import send_email, EmailError, load_template from .email_sender import create_transcript_docx, create_summary_docx +from .email_sender import create_combined_docx logger = logging.getLogger("scraibe.tasks") @@ -405,6 +406,58 @@ def process_transcription_task( ) speaker_map = {} + # 3c) Generate short cover-page descriptions using summarizer + transcript_cover_desc = "" + summary_cover_desc = "" + today_str = datetime.utcnow().strftime("%Y-%m-%d") + + try: + scraibe._ensure_summarizer() + summarizer = scraibe._summarizer + + # Transcript description + prompt = ( + "In one short sentence (max 25 words), describe the content of this transcript " + "for use on a cover page. Do not include speaker names. Output only the sentence.\n\n" + + transcript_text + ) + response = summarizer._chat_completion( + messages=[{"role": "user", "content": prompt}], + temperature=0.3, + max_tokens=60, + ) + transcript_cover_desc = ( + (response or {}).get("choices", [{}])[0].get("message", {}).get("content", "") + ).strip() + if not transcript_cover_desc: + transcript_cover_desc = "Transcript of the recorded conversation." + logger.info("Transcript cover description: %s", transcript_cover_desc) + + # Summary description (if summary requested) + if include_summary: + prompt = ( + "In one short sentence (max 25 words), describe the content of this summary " + "for use on a cover page. Output only the sentence.\n\n" + + summary_text + ) + response = summarizer._chat_completion( + messages=[{"role": "user", "content": prompt}], + temperature=0.3, + max_tokens=60, + ) + summary_cover_desc = ( + (response or {}).get("choices", [{}])[0].get("message", {}).get("content", "") + ).strip() + if not summary_cover_desc: + summary_cover_desc = "Summary of the recorded conversation." + logger.info("Summary cover description: %s", summary_cover_desc) + + except (SummarizerError, Exception) as e: + logger.warning("Failed to generate cover descriptions: %s", e) + transcript_cover_desc = transcript_cover_desc or "Transcript of the recorded conversation." + if include_summary: + summary_cover_desc = summary_cover_desc or "Summary of the recorded conversation." + # 4) Prepare files # Transcript .md @@ -414,9 +467,15 @@ def process_transcription_task( f.write(transcript_text) temp_files.append(md_transcript_path) - # Transcript .docx + # Transcript .docx (standalone, with cover page) docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx") - create_transcript_docx(transcript_text, docx_transcript_path) + create_transcript_docx( + transcript_text, + docx_transcript_path, + include_cover=True, + cover_date=today_str, + cover_desc=transcript_cover_desc, + ) temp_files.append(docx_transcript_path) # JSON as SOURCE @@ -440,17 +499,42 @@ def process_transcription_task( temp_files.append(json_path) # Summary files (if present) + md_summary_path = None + docx_summary_path = None + docx_combined_path = None + if summary_text: + # Summary .md md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md") with open(md_summary_path, "w", encoding="utf-8") as f: f.write("# Summary\n\n") f.write(summary_text) temp_files.append(md_summary_path) + # Summary .docx (standalone, with cover page) docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx") - create_summary_docx(summary_text, docx_summary_path) + create_summary_docx( + summary_text, + docx_summary_path, + include_cover=True, + cover_date=today_str, + cover_desc=summary_cover_desc, + ) temp_files.append(docx_summary_path) + # Combined .docx: Transcript + Summary with cover pages + docx_combined_path = _safe_filename("TRANSCRIPT_AND_SUMMARY", local, date_tag, ".docx") + create_combined_docx( + transcript_text=transcript_text, + summary_text=summary_text, + filename=docx_combined_path, + transcript_cover_date=today_str, + transcript_cover_desc=transcript_cover_desc, + summary_cover_date=today_str, + summary_cover_desc=summary_cover_desc, + ) + temp_files.append(docx_combined_path) + attachments = [ md_transcript_path, docx_transcript_path, @@ -458,6 +542,8 @@ def process_transcription_task( ] if summary_text: attachments += [md_summary_path, docx_summary_path] + if docx_combined_path: + attachments.append(docx_combined_path) # 5) Send success email send_success_email(