Use structured filenames and formal DOCX transcript styling
This commit is contained in:
+71
-13
@@ -9,6 +9,7 @@ Template placeholders are primarily filled via environment variables.
|
||||
|
||||
import base64
|
||||
import os
|
||||
import re
|
||||
import smtplib
|
||||
import logging
|
||||
from email import encoders
|
||||
@@ -17,6 +18,11 @@ from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
from docx import Document
|
||||
from docx.shared import Inches, Pt
|
||||
from docx.oxml.ns import qn
|
||||
from docx.oxml import OxmlElement
|
||||
|
||||
logger = logging.getLogger("scraibe.email_sender")
|
||||
|
||||
|
||||
@@ -280,30 +286,82 @@ def send_email(
|
||||
|
||||
def create_transcript_docx(text: str, filename: str):
|
||||
"""
|
||||
Create a .docx file from plain/markdown transcript text.
|
||||
Create a .docx transcript with:
|
||||
- 1.5" left margin, 1" right margin
|
||||
- 12pt Courier
|
||||
- Continuous line numbering on the left
|
||||
- Speaker names capitalized and indented; spoken text further indented
|
||||
"""
|
||||
from docx import Document
|
||||
from docx.shared import Pt
|
||||
|
||||
doc = Document()
|
||||
doc.add_heading("Transcript", level=1)
|
||||
section = doc.sections[0]
|
||||
|
||||
for line in text.splitlines():
|
||||
p = doc.add_paragraph(line)
|
||||
p.paragraph_format.space_after = Pt(4)
|
||||
# Margins
|
||||
section.left_margin = Inches(1.5)
|
||||
section.right_margin = Inches(1.0)
|
||||
section.top_margin = Inches(1.0)
|
||||
section.bottom_margin = Inches(1.0)
|
||||
|
||||
# Line numbering (continuous, left side)
|
||||
section_type = section.element.find(qn("w:sectionPr"))
|
||||
if section_type is None:
|
||||
section_type = OxmlElement("w:sectionPr")
|
||||
section.element.insert(0, section_type)
|
||||
|
||||
line_num = OxmlElement("w:lineNumbering")
|
||||
line_num.set(qn("w:start"), "continuous")
|
||||
line_num.set(qn("w:countBy"), "1")
|
||||
section_type.append(line_num)
|
||||
|
||||
# Default font
|
||||
style = doc.styles["Normal"]
|
||||
font = style.font
|
||||
font.name = "Courier"
|
||||
font.size = Pt(12)
|
||||
|
||||
# Parse lines
|
||||
lines = text.strip().split("\n")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Try to parse: [00:00] SPEAKER: text
|
||||
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line)
|
||||
if m:
|
||||
ts, speaker, content = m.groups()
|
||||
# Speaker line
|
||||
p_spk = doc.add_paragraph()
|
||||
p_spk.paragraph_format.left_indent = Inches(0.25)
|
||||
run_spk = p_spk.add_run(f"[{ts}] {speaker.upper()}")
|
||||
run_spk.bold = True
|
||||
run_spk.font.name = "Courier"
|
||||
run_spk.font.size = Pt(12)
|
||||
|
||||
# Spoken text line
|
||||
p_txt = doc.add_paragraph()
|
||||
p_txt.paragraph_format.left_indent = Inches(0.5)
|
||||
run_txt = p_txt.add_run(content.strip())
|
||||
run_txt.font.name = "Courier"
|
||||
run_txt.font.size = Pt(12)
|
||||
else:
|
||||
# Fallback for non-standard lines
|
||||
p = doc.add_paragraph()
|
||||
run = p.add_run(line)
|
||||
run.font.name = "Courier"
|
||||
run.font.size = Pt(12)
|
||||
|
||||
doc.save(filename)
|
||||
|
||||
|
||||
def create_summary_docx(text: str, filename: str):
|
||||
"""
|
||||
Create a .docx file from summary text.
|
||||
Create a .docx summary with consistent font.
|
||||
"""
|
||||
from docx import Document
|
||||
from docx.shared import Pt
|
||||
|
||||
doc = Document()
|
||||
doc.add_heading("Summary", level=1)
|
||||
style = doc.styles["Normal"]
|
||||
font = style.font
|
||||
font.name = "Courier"
|
||||
font.size = Pt(12)
|
||||
|
||||
for line in text.splitlines():
|
||||
p = doc.add_paragraph(line)
|
||||
|
||||
Reference in New Issue
Block a user