4bc9f82ee7
- Confirmed MCP server endpoints and /transcribe flow. - Confirmed watcher audio detection logic. - Confirmed summarizer prompt loading and env override. - Confirmed docx_styles markdown-to-DOCX conversion. - Confirmed docx_cover integration. - Confirmed email_sender with cover pages and markdown styling. - Confirmed tasks and __main__ wiring.
148 lines
4.2 KiB
Python
148 lines
4.2 KiB
Python
"""
|
|
Utility module for applying styles and converting simple markdown
|
|
into styled DOCX paragraphs/runs for summaries.
|
|
"""
|
|
|
|
import re
|
|
from docx import Document
|
|
from docx.shared import Pt
|
|
from docx.oxml import OxmlElement
|
|
from docx.oxml.ns import qn
|
|
|
|
|
|
def _ensure_style(doc, name, based_on="Normal", font_name="Courier", font_size=Pt(12)):
|
|
"""
|
|
Ensure a paragraph style exists in the document.
|
|
"""
|
|
styles = doc.styles
|
|
if name not in [s.name for s in styles]:
|
|
style = styles.add_style(name, 1) # 1 = WD_STYLE_TYPE.PARAGRAPH
|
|
style.font.name = font_name
|
|
style.font.size = font_size
|
|
if based_on:
|
|
style.base_style = styles[based_on]
|
|
return styles[name]
|
|
|
|
|
|
def apply_heading_style(doc, paragraph, level: int):
|
|
"""
|
|
Apply heading style to a paragraph based on level (1, 2, 3).
|
|
"""
|
|
if level == 1:
|
|
style_name = "SummaryHeading1"
|
|
size = Pt(16)
|
|
elif level == 2:
|
|
style_name = "SummaryHeading2"
|
|
size = Pt(14)
|
|
else:
|
|
style_name = "SummaryHeading3"
|
|
size = Pt(12)
|
|
|
|
style = _ensure_style(doc, style_name, font_size=size)
|
|
paragraph.style = style
|
|
paragraph.paragraph_format.space_before = Pt(4)
|
|
paragraph.paragraph_format.space_after = Pt(2)
|
|
|
|
|
|
def apply_bullet_style(doc, paragraph):
|
|
"""
|
|
Apply a simple bullet style to a paragraph.
|
|
"""
|
|
style_name = "SummaryBullet"
|
|
style = _ensure_style(doc, style_name)
|
|
paragraph.style = style
|
|
pPr = paragraph._p.get_or_add_pPr()
|
|
tabs = OxmlElement("w:tabs")
|
|
tab = OxmlElement("w:tab")
|
|
tab.set(qn("w:val"), "left")
|
|
tab.set(qn("w:pos"), "360")
|
|
tabs.append(tab)
|
|
pPr.append(tabs)
|
|
|
|
|
|
def parse_simple_md_to_paragraphs(doc, text: str):
|
|
"""
|
|
Convert simple markdown text into DOCX paragraphs with styles.
|
|
|
|
Supported:
|
|
- # / ## / ### for headings
|
|
- - / * for bullet lists
|
|
- **bold** and *italic*
|
|
|
|
This is intentionally simple and robust for legal/business summaries.
|
|
"""
|
|
lines = text.splitlines()
|
|
current_paragraph = None
|
|
in_list = False
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
current_paragraph = None
|
|
in_list = False
|
|
continue
|
|
|
|
# Headings
|
|
heading_match = re.match(r"^(#{1,3})\s+(.*)", stripped)
|
|
if heading_match:
|
|
level = len(heading_match.group(1))
|
|
content = heading_match.group(2).strip()
|
|
p = doc.add_paragraph()
|
|
apply_heading_style(doc, p, level)
|
|
_add_run_with_inline_md(p, content)
|
|
current_paragraph = p
|
|
in_list = False
|
|
continue
|
|
|
|
# Bullet list
|
|
bullet_match = re.match(r"^[-*]\s+(.*)", stripped)
|
|
if bullet_match:
|
|
content = bullet_match.group(1).strip()
|
|
if not in_list or current_paragraph is None:
|
|
in_list = True
|
|
current_paragraph = doc.add_paragraph()
|
|
apply_bullet_style(doc, current_paragraph)
|
|
else:
|
|
current_paragraph = doc.add_paragraph()
|
|
apply_bullet_style(doc, current_paragraph)
|
|
_add_run_with_inline_md(current_paragraph, content)
|
|
continue
|
|
|
|
# Normal paragraph
|
|
if not in_list or current_paragraph is None:
|
|
in_list = False
|
|
current_paragraph = doc.add_paragraph()
|
|
else:
|
|
current_paragraph = doc.add_paragraph()
|
|
|
|
_add_run_with_inline_md(current_paragraph, stripped)
|
|
|
|
|
|
def _add_run_with_inline_md(paragraph, text: str):
|
|
"""
|
|
Add runs to a paragraph, interpreting **bold** and *italic*.
|
|
"""
|
|
# Simple regex for bold and italic
|
|
parts = re.split(r"(\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*)", text)
|
|
for part in parts:
|
|
if not part:
|
|
continue
|
|
|
|
run = paragraph.add_run(part)
|
|
run.font.name = "Courier"
|
|
run.font.size = Pt(12)
|
|
|
|
# Bold
|
|
bold_match = re.fullmatch(r"\*\*(.+?)\*\*", part)
|
|
if bold_match:
|
|
run.bold = True
|
|
part = bold_match.group(1)
|
|
|
|
# Italic
|
|
italic_match = re.fullmatch(r"\*(.+?)\*", part)
|
|
if italic_match:
|
|
run.italic = True
|
|
part = italic_match.group(1)
|
|
|
|
run.text = part
|