Fix page breaks: insert after every 29 lines; wrap at 58 chars preserving whole words
- Insert page break after every 29 visual lines. - Wrap content at 58 characters, keeping whole words together. - Ensure no text is lost; all transcript text is included.
This commit is contained in:
+32
-44
@@ -465,7 +465,7 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
_create_transcript_section_properties(doc.sections[0])
|
_create_transcript_section_properties(doc.sections[0])
|
||||||
|
|
||||||
# Max characters per visual line (content only; total line including line number and spaces <= 60)
|
# Max characters per visual line (content only; total line including line number and spaces <= 60)
|
||||||
max_chars = 54
|
max_chars = 58
|
||||||
|
|
||||||
# Lines per page before restarting numbering
|
# Lines per page before restarting numbering
|
||||||
lines_per_page = 29
|
lines_per_page = 29
|
||||||
@@ -476,26 +476,18 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
# Split transcript into logical lines
|
# Split transcript into logical lines
|
||||||
logical_lines = text.strip().splitlines()
|
logical_lines = text.strip().splitlines()
|
||||||
|
|
||||||
def ensure_new_page_if_needed():
|
def insert_page_break():
|
||||||
nonlocal line_number
|
nonlocal line_number
|
||||||
if line_number >= lines_per_page:
|
p_break = doc.add_paragraph()
|
||||||
# Insert a page break paragraph (no line number, no text)
|
pPr = p_break._p.get_or_add_pPr()
|
||||||
p_break = doc.add_paragraph()
|
for child in list(pPr):
|
||||||
pPr = p_break._p.get_or_add_pPr()
|
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
||||||
|
if tag in ("tabs", "spacing", "ind"):
|
||||||
# Clear any inherited formatting
|
pPr.remove(child)
|
||||||
for child in list(pPr):
|
page_break = OxmlElement("w:pageBreak")
|
||||||
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
page_break.set("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val", "1")
|
||||||
if tag in ("tabs", "spacing", "ind"):
|
pPr.append(page_break)
|
||||||
pPr.remove(child)
|
line_number = 0
|
||||||
|
|
||||||
# Standard page break via paragraph property
|
|
||||||
page_break = OxmlElement("w:pageBreak")
|
|
||||||
page_break.set("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val", "1")
|
|
||||||
pPr.append(page_break)
|
|
||||||
|
|
||||||
# Reset line counter for new page
|
|
||||||
line_number = 0
|
|
||||||
|
|
||||||
for line in logical_lines:
|
for line in logical_lines:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
@@ -512,7 +504,7 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
label_text = ""
|
label_text = ""
|
||||||
content = line.strip()
|
content = line.strip()
|
||||||
|
|
||||||
# Split content into visual lines at word boundaries
|
# Wrap content into visual lines (whole words, max_chars)
|
||||||
content_lines = []
|
content_lines = []
|
||||||
words = content.split()
|
words = content.split()
|
||||||
current = ""
|
current = ""
|
||||||
@@ -527,15 +519,12 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
if current:
|
if current:
|
||||||
content_lines.append(current)
|
content_lines.append(current)
|
||||||
|
|
||||||
# First visual line: include label if present, ensuring total <= max_chars
|
# Prepare first visual line with label if present
|
||||||
# If too long, split into multiple lines instead of dropping text.
|
visual_lines = []
|
||||||
if content_lines:
|
if content_lines:
|
||||||
ensure_new_page_if_needed()
|
|
||||||
|
|
||||||
first_content = content_lines.pop(0)
|
first_content = content_lines.pop(0)
|
||||||
if label_text:
|
if label_text:
|
||||||
prefix = label_text + " "
|
prefix = label_text + " "
|
||||||
# If prefix + first_content is too long, wrap first_content
|
|
||||||
if len(prefix) + len(first_content) > max_chars:
|
if len(prefix) + len(first_content) > max_chars:
|
||||||
allowed = max_chars - len(prefix)
|
allowed = max_chars - len(prefix)
|
||||||
if allowed < 1:
|
if allowed < 1:
|
||||||
@@ -545,17 +534,12 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
if last_space > 0:
|
if last_space > 0:
|
||||||
kept = candidate[:last_space]
|
kept = candidate[:last_space]
|
||||||
rest = first_content[last_space:].strip()
|
rest = first_content[last_space:].strip()
|
||||||
else: # no space found; break mid-word to preserve everything
|
else:
|
||||||
kept = candidate
|
kept = candidate
|
||||||
rest = first_content[allowed:].strip()
|
rest = first_content[allowed:].strip()
|
||||||
|
|
||||||
first_line_text = prefix + kept
|
visual_lines.append(prefix + kept)
|
||||||
line_number += 1
|
|
||||||
_add_transcript_paragraph(doc, first_line_text, line_number=line_number)
|
|
||||||
|
|
||||||
# Prepend any remaining text as continuation lines
|
|
||||||
if rest:
|
if rest:
|
||||||
# Re-wrap the rest into content_lines
|
|
||||||
extra_words = rest.split()
|
extra_words = rest.split()
|
||||||
new_lines = []
|
new_lines = []
|
||||||
buf = ""
|
buf = ""
|
||||||
@@ -569,22 +553,26 @@ def create_transcript_docx(text: str, filename: str):
|
|||||||
buf = ew
|
buf = ew
|
||||||
if buf:
|
if buf:
|
||||||
new_lines.append(buf)
|
new_lines.append(buf)
|
||||||
# Insert these before existing content_lines
|
|
||||||
content_lines = new_lines + content_lines
|
content_lines = new_lines + content_lines
|
||||||
else:
|
else:
|
||||||
first_line_text = prefix + first_content
|
visual_lines.append(prefix + first_content)
|
||||||
line_number += 1
|
|
||||||
_add_transcript_paragraph(doc, first_line_text, line_number=line_number)
|
|
||||||
else:
|
else:
|
||||||
first_line_text = first_content
|
visual_lines.append(first_content)
|
||||||
line_number += 1
|
|
||||||
_add_transcript_paragraph(doc, first_line_text, line_number=line_number)
|
|
||||||
|
|
||||||
# Subsequent visual lines: no label, just content
|
visual_lines.extend(content_lines)
|
||||||
for cl in content_lines:
|
else:
|
||||||
ensure_new_page_if_needed()
|
# No content_lines but maybe label_text only
|
||||||
|
if label_text:
|
||||||
|
visual_lines.append(label_text)
|
||||||
|
|
||||||
|
# If adding these visual lines would exceed lines_per_page, insert page break first
|
||||||
|
if line_number + len(visual_lines) > lines_per_page:
|
||||||
|
insert_page_break()
|
||||||
|
|
||||||
|
# Write visual lines
|
||||||
|
for vl in visual_lines:
|
||||||
line_number += 1
|
line_number += 1
|
||||||
_add_transcript_paragraph(doc, cl, line_number=line_number)
|
_add_transcript_paragraph(doc, vl, line_number=line_number)
|
||||||
|
|
||||||
# Add page numbers to footer: "X of Y" (bottom left)
|
# Add page numbers to footer: "X of Y" (bottom left)
|
||||||
section = doc.sections[0]
|
section = doc.sections[0]
|
||||||
|
|||||||
Reference in New Issue
Block a user