From 49f3cdc407b74aecf91eeb7588930d4a496ac004 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 19 Jun 2026 15:32:31 +0000 Subject: [PATCH] Fix page breaks: insert after every 29 lines; wrap at 58 chars preserving whole words - Insert page break after every 29 visual lines. - Wrap content at 58 characters, keeping whole words together. - Ensure no text is lost; all transcript text is included. --- scraibe/email_sender.py | 76 +++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/scraibe/email_sender.py b/scraibe/email_sender.py index e622fd9..2277186 100644 --- a/scraibe/email_sender.py +++ b/scraibe/email_sender.py @@ -465,7 +465,7 @@ def create_transcript_docx(text: str, filename: str): _create_transcript_section_properties(doc.sections[0]) # Max characters per visual line (content only; total line including line number and spaces <= 60) - max_chars = 54 + max_chars = 58 # Lines per page before restarting numbering lines_per_page = 29 @@ -476,26 +476,18 @@ def create_transcript_docx(text: str, filename: str): # Split transcript into logical lines logical_lines = text.strip().splitlines() - def ensure_new_page_if_needed(): + def insert_page_break(): nonlocal line_number - if line_number >= lines_per_page: - # Insert a page break paragraph (no line number, no text) - p_break = doc.add_paragraph() - pPr = p_break._p.get_or_add_pPr() - - # Clear any inherited formatting - for child in list(pPr): - tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag - if tag in ("tabs", "spacing", "ind"): - pPr.remove(child) - - # Standard page break via paragraph property - page_break = OxmlElement("w:pageBreak") - page_break.set("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val", "1") - pPr.append(page_break) - - # Reset line counter for new page - line_number = 0 + p_break = doc.add_paragraph() + pPr = p_break._p.get_or_add_pPr() + for child in list(pPr): + tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag + if tag in ("tabs", "spacing", "ind"): + pPr.remove(child) + page_break = OxmlElement("w:pageBreak") + page_break.set("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val", "1") + pPr.append(page_break) + line_number = 0 for line in logical_lines: line = line.strip() @@ -512,7 +504,7 @@ def create_transcript_docx(text: str, filename: str): label_text = "" content = line.strip() - # Split content into visual lines at word boundaries + # Wrap content into visual lines (whole words, max_chars) content_lines = [] words = content.split() current = "" @@ -527,15 +519,12 @@ def create_transcript_docx(text: str, filename: str): if current: content_lines.append(current) - # First visual line: include label if present, ensuring total <= max_chars - # If too long, split into multiple lines instead of dropping text. + # Prepare first visual line with label if present + visual_lines = [] if content_lines: - ensure_new_page_if_needed() - first_content = content_lines.pop(0) if label_text: prefix = label_text + " " - # If prefix + first_content is too long, wrap first_content if len(prefix) + len(first_content) > max_chars: allowed = max_chars - len(prefix) if allowed < 1: @@ -545,17 +534,12 @@ def create_transcript_docx(text: str, filename: str): if last_space > 0: kept = candidate[:last_space] rest = first_content[last_space:].strip() - else: # no space found; break mid-word to preserve everything + else: kept = candidate rest = first_content[allowed:].strip() - first_line_text = prefix + kept - line_number += 1 - _add_transcript_paragraph(doc, first_line_text, line_number=line_number) - - # Prepend any remaining text as continuation lines + visual_lines.append(prefix + kept) if rest: - # Re-wrap the rest into content_lines extra_words = rest.split() new_lines = [] buf = "" @@ -569,22 +553,26 @@ def create_transcript_docx(text: str, filename: str): buf = ew if buf: new_lines.append(buf) - # Insert these before existing content_lines content_lines = new_lines + content_lines else: - first_line_text = prefix + first_content - line_number += 1 - _add_transcript_paragraph(doc, first_line_text, line_number=line_number) + visual_lines.append(prefix + first_content) else: - first_line_text = first_content - line_number += 1 - _add_transcript_paragraph(doc, first_line_text, line_number=line_number) + visual_lines.append(first_content) - # Subsequent visual lines: no label, just content - for cl in content_lines: - ensure_new_page_if_needed() + visual_lines.extend(content_lines) + else: + # No content_lines but maybe label_text only + if label_text: + visual_lines.append(label_text) + + # If adding these visual lines would exceed lines_per_page, insert page break first + if line_number + len(visual_lines) > lines_per_page: + insert_page_break() + + # Write visual lines + for vl in visual_lines: line_number += 1 - _add_transcript_paragraph(doc, cl, line_number=line_number) + _add_transcript_paragraph(doc, vl, line_number=line_number) # Add page numbers to footer: "X of Y" (bottom left) section = doc.sections[0]