""" Utility module for applying styles and converting simple markdown into styled DOCX paragraphs/runs for summaries. """ import re from docx import Document from docx.shared import Pt from docx.oxml import OxmlElement from docx.oxml.ns import qn def _ensure_style(doc, name, based_on="Normal", font_name="Courier", font_size=Pt(12)): """ Ensure a paragraph style exists in the document. """ styles = doc.styles if name not in [s.name for s in styles]: style = styles.add_style(name, 1) # 1 = WD_STYLE_TYPE.PARAGRAPH style.font.name = font_name style.font.size = font_size if based_on: style.base_style = styles[based_on] return styles[name] def apply_heading_style(doc, paragraph, level: int): """ Apply heading style to a paragraph based on level (1, 2, 3). """ if level == 1: style_name = "SummaryHeading1" size = Pt(16) elif level == 2: style_name = "SummaryHeading2" size = Pt(14) else: style_name = "SummaryHeading3" size = Pt(12) style = _ensure_style(doc, style_name, font_size=size) paragraph.style = style paragraph.paragraph_format.space_before = Pt(4) paragraph.paragraph_format.space_after = Pt(2) def apply_bullet_style(doc, paragraph): """ Apply a simple bullet style to a paragraph. """ style_name = "SummaryBullet" style = _ensure_style(doc, style_name) paragraph.style = style pPr = paragraph._p.get_or_add_pPr() tabs = OxmlElement("w:tabs") tab = OxmlElement("w:tab") tab.set(qn("w:val"), "left") tab.set(qn("w:pos"), "360") tabs.append(tab) pPr.append(tabs) def parse_simple_md_to_paragraphs(doc, text: str): """ Convert simple markdown text into DOCX paragraphs with styles. Supported: - # / ## / ### for headings - - / * for bullet lists - **bold** and *italic* This is intentionally simple and robust for legal/business summaries. """ lines = text.splitlines() current_paragraph = None in_list = False for line in lines: stripped = line.strip() if not stripped: current_paragraph = None in_list = False continue # Headings heading_match = re.match(r"^(#{1,3})\s+(.*)", stripped) if heading_match: level = len(heading_match.group(1)) content = heading_match.group(2).strip() p = doc.add_paragraph() apply_heading_style(doc, p, level) _add_run_with_inline_md(p, content) current_paragraph = p in_list = False continue # Bullet list bullet_match = re.match(r"^[-*]\s+(.*)", stripped) if bullet_match: content = bullet_match.group(1).strip() if not in_list or current_paragraph is None: in_list = True current_paragraph = doc.add_paragraph() apply_bullet_style(doc, current_paragraph) else: current_paragraph = doc.add_paragraph() apply_bullet_style(doc, current_paragraph) _add_run_with_inline_md(current_paragraph, content) continue # Normal paragraph if not in_list or current_paragraph is None: in_list = False current_paragraph = doc.add_paragraph() else: current_paragraph = doc.add_paragraph() _add_run_with_inline_md(current_paragraph, stripped) def _add_run_with_inline_md(paragraph, text: str): """ Add runs to a paragraph, interpreting **bold** and *italic*. """ # Simple regex for bold and italic parts = re.split(r"(\*\*\*.*?\*\*\*|\*\*.*?\*\*|\*.*?\*)", text) for part in parts: if not part: continue run = paragraph.add_run(part) run.font.name = "Courier" run.font.size = Pt(12) # Bold bold_match = re.fullmatch(r"\*\*(.+?)\*\*", part) if bold_match: run.bold = True part = bold_match.group(1) # Italic italic_match = re.fullmatch(r"\*(.+?)\*", part) if italic_match: run.italic = True part = italic_match.group(1) run.text = part