Use structured filenames and formal DOCX transcript styling
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 16:20:10 +00:00
parent 2dce9b43c9
commit a8f48b9e58
2 changed files with 121 additions and 27 deletions
+71 -13
View File
@@ -9,6 +9,7 @@ Template placeholders are primarily filled via environment variables.
import base64
import os
import re
import smtplib
import logging
from email import encoders
@@ -17,6 +18,11 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from typing import List, Optional, Dict, Any
from docx import Document
from docx.shared import Inches, Pt
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
logger = logging.getLogger("scraibe.email_sender")
@@ -280,30 +286,82 @@ def send_email(
def create_transcript_docx(text: str, filename: str):
"""
Create a .docx file from plain/markdown transcript text.
Create a .docx transcript with:
- 1.5" left margin, 1" right margin
- 12pt Courier
- Continuous line numbering on the left
- Speaker names capitalized and indented; spoken text further indented
"""
from docx import Document
from docx.shared import Pt
doc = Document()
doc.add_heading("Transcript", level=1)
section = doc.sections[0]
for line in text.splitlines():
p = doc.add_paragraph(line)
p.paragraph_format.space_after = Pt(4)
# Margins
section.left_margin = Inches(1.5)
section.right_margin = Inches(1.0)
section.top_margin = Inches(1.0)
section.bottom_margin = Inches(1.0)
# Line numbering (continuous, left side)
section_type = section.element.find(qn("w:sectionPr"))
if section_type is None:
section_type = OxmlElement("w:sectionPr")
section.element.insert(0, section_type)
line_num = OxmlElement("w:lineNumbering")
line_num.set(qn("w:start"), "continuous")
line_num.set(qn("w:countBy"), "1")
section_type.append(line_num)
# Default font
style = doc.styles["Normal"]
font = style.font
font.name = "Courier"
font.size = Pt(12)
# Parse lines
lines = text.strip().split("\n")
for line in lines:
line = line.strip()
if not line:
continue
# Try to parse: [00:00] SPEAKER: text
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line)
if m:
ts, speaker, content = m.groups()
# Speaker line
p_spk = doc.add_paragraph()
p_spk.paragraph_format.left_indent = Inches(0.25)
run_spk = p_spk.add_run(f"[{ts}] {speaker.upper()}")
run_spk.bold = True
run_spk.font.name = "Courier"
run_spk.font.size = Pt(12)
# Spoken text line
p_txt = doc.add_paragraph()
p_txt.paragraph_format.left_indent = Inches(0.5)
run_txt = p_txt.add_run(content.strip())
run_txt.font.name = "Courier"
run_txt.font.size = Pt(12)
else:
# Fallback for non-standard lines
p = doc.add_paragraph()
run = p.add_run(line)
run.font.name = "Courier"
run.font.size = Pt(12)
doc.save(filename)
def create_summary_docx(text: str, filename: str):
"""
Create a .docx file from summary text.
Create a .docx summary with consistent font.
"""
from docx import Document
from docx.shared import Pt
doc = Document()
doc.add_heading("Summary", level=1)
style = doc.styles["Normal"]
font = style.font
font.name = "Courier"
font.size = Pt(12)
for line in text.splitlines():
p = doc.add_paragraph(line)