diff --git a/scraibe/email_sender.py b/scraibe/email_sender.py index 9d2112b..d55d789 100644 --- a/scraibe/email_sender.py +++ b/scraibe/email_sender.py @@ -316,6 +316,7 @@ def _create_transcript_section_properties(section): """ Configure the section properties for transcript DOCX: - Margins: 1 inch all sides + - Single column layout - Line numbering: start=1, countBy=1, restart each page - Disable document grid to avoid off-by-one line numbering """ @@ -334,6 +335,12 @@ def _create_transcript_section_properties(section): _set_element_attr(pgMar, "footer", "720") _set_element_attr(pgMar, "gutter", "0") + # Ensure single column (no multi-column layout that can interfere with line numbering) + cols = sectPr.find(f"{{{W_NS}}}cols") + if cols is not None: + _set_element_attr(cols, "num", "1") + _set_element_attr(cols, "space", "720") + # Disable document grid to avoid Word counting phantom grid lines docGrid = sectPr.find(f"{{{W_NS}}}docGrid") if docGrid is not None: @@ -351,6 +358,7 @@ def _add_transcript_paragraph(doc, line_text): """ Add a single transcript line as a paragraph. If it matches timestamp + speaker format, format the label differently. + Uses single line spacing and zero before/after spacing so each paragraph = 1 line. """ line_text = line_text.strip() if not line_text: @@ -359,6 +367,15 @@ def _add_transcript_paragraph(doc, line_text): p = doc.add_paragraph() p.paragraph_format.left_indent = Inches(0.25) + # Enforce single line spacing with no extra before/after + pPr = p._p.get_or_add_pPr() + spacing = OxmlElement("w:spacing") + _set_element_attr(spacing, "before", "0") + _set_element_attr(spacing, "after", "0") + _set_element_attr(spacing, "line", "240") # 12pt in twips + _set_element_attr(spacing, "lineRule", "auto") + pPr.append(spacing) + # Try to match: [00:00] SPEAKER 1: content m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line_text) if m: