Fix page breaks: insert after every 29 lines; wrap at 58 chars preserving whole words
Mirror and run GitLab CI / build (push) Waiting to run
Ruff / ruff (push) Waiting to run

- Insert page break after every 29 visual lines.
- Wrap content at 58 characters, keeping whole words together.
- Ensure no text is lost; all transcript text is included.
This commit is contained in:
admin
2026-06-19 15:32:31 +00:00
parent 2c0998579c
commit 49f3cdc407
+32 -44
View File
@@ -465,7 +465,7 @@ def create_transcript_docx(text: str, filename: str):
_create_transcript_section_properties(doc.sections[0]) _create_transcript_section_properties(doc.sections[0])
# Max characters per visual line (content only; total line including line number and spaces <= 60) # Max characters per visual line (content only; total line including line number and spaces <= 60)
max_chars = 54 max_chars = 58
# Lines per page before restarting numbering # Lines per page before restarting numbering
lines_per_page = 29 lines_per_page = 29
@@ -476,26 +476,18 @@ def create_transcript_docx(text: str, filename: str):
# Split transcript into logical lines # Split transcript into logical lines
logical_lines = text.strip().splitlines() logical_lines = text.strip().splitlines()
def ensure_new_page_if_needed(): def insert_page_break():
nonlocal line_number nonlocal line_number
if line_number >= lines_per_page: p_break = doc.add_paragraph()
# Insert a page break paragraph (no line number, no text) pPr = p_break._p.get_or_add_pPr()
p_break = doc.add_paragraph() for child in list(pPr):
pPr = p_break._p.get_or_add_pPr() tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
if tag in ("tabs", "spacing", "ind"):
# Clear any inherited formatting pPr.remove(child)
for child in list(pPr): page_break = OxmlElement("w:pageBreak")
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag page_break.set("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val", "1")
if tag in ("tabs", "spacing", "ind"): pPr.append(page_break)
pPr.remove(child) line_number = 0
# Standard page break via paragraph property
page_break = OxmlElement("w:pageBreak")
page_break.set("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val", "1")
pPr.append(page_break)
# Reset line counter for new page
line_number = 0
for line in logical_lines: for line in logical_lines:
line = line.strip() line = line.strip()
@@ -512,7 +504,7 @@ def create_transcript_docx(text: str, filename: str):
label_text = "" label_text = ""
content = line.strip() content = line.strip()
# Split content into visual lines at word boundaries # Wrap content into visual lines (whole words, max_chars)
content_lines = [] content_lines = []
words = content.split() words = content.split()
current = "" current = ""
@@ -527,15 +519,12 @@ def create_transcript_docx(text: str, filename: str):
if current: if current:
content_lines.append(current) content_lines.append(current)
# First visual line: include label if present, ensuring total <= max_chars # Prepare first visual line with label if present
# If too long, split into multiple lines instead of dropping text. visual_lines = []
if content_lines: if content_lines:
ensure_new_page_if_needed()
first_content = content_lines.pop(0) first_content = content_lines.pop(0)
if label_text: if label_text:
prefix = label_text + " " prefix = label_text + " "
# If prefix + first_content is too long, wrap first_content
if len(prefix) + len(first_content) > max_chars: if len(prefix) + len(first_content) > max_chars:
allowed = max_chars - len(prefix) allowed = max_chars - len(prefix)
if allowed < 1: if allowed < 1:
@@ -545,17 +534,12 @@ def create_transcript_docx(text: str, filename: str):
if last_space > 0: if last_space > 0:
kept = candidate[:last_space] kept = candidate[:last_space]
rest = first_content[last_space:].strip() rest = first_content[last_space:].strip()
else: # no space found; break mid-word to preserve everything else:
kept = candidate kept = candidate
rest = first_content[allowed:].strip() rest = first_content[allowed:].strip()
first_line_text = prefix + kept visual_lines.append(prefix + kept)
line_number += 1
_add_transcript_paragraph(doc, first_line_text, line_number=line_number)
# Prepend any remaining text as continuation lines
if rest: if rest:
# Re-wrap the rest into content_lines
extra_words = rest.split() extra_words = rest.split()
new_lines = [] new_lines = []
buf = "" buf = ""
@@ -569,22 +553,26 @@ def create_transcript_docx(text: str, filename: str):
buf = ew buf = ew
if buf: if buf:
new_lines.append(buf) new_lines.append(buf)
# Insert these before existing content_lines
content_lines = new_lines + content_lines content_lines = new_lines + content_lines
else: else:
first_line_text = prefix + first_content visual_lines.append(prefix + first_content)
line_number += 1
_add_transcript_paragraph(doc, first_line_text, line_number=line_number)
else: else:
first_line_text = first_content visual_lines.append(first_content)
line_number += 1
_add_transcript_paragraph(doc, first_line_text, line_number=line_number)
# Subsequent visual lines: no label, just content visual_lines.extend(content_lines)
for cl in content_lines: else:
ensure_new_page_if_needed() # No content_lines but maybe label_text only
if label_text:
visual_lines.append(label_text)
# If adding these visual lines would exceed lines_per_page, insert page break first
if line_number + len(visual_lines) > lines_per_page:
insert_page_break()
# Write visual lines
for vl in visual_lines:
line_number += 1 line_number += 1
_add_transcript_paragraph(doc, cl, line_number=line_number) _add_transcript_paragraph(doc, vl, line_number=line_number)
# Add page numbers to footer: "X of Y" (bottom left) # Add page numbers to footer: "X of Y" (bottom left)
section = doc.sections[0] section = doc.sections[0]