Fix transcript DOCX line numbering (spacing and column fixes)
- Ensure single column layout (cols num='1') - Set explicit single line spacing (before/after=0, line=240 twips) - Prevents Word from counting extra lines due to spacing/columns
This commit is contained in:
@@ -316,6 +316,7 @@ def _create_transcript_section_properties(section):
|
|||||||
"""
|
"""
|
||||||
Configure the section properties for transcript DOCX:
|
Configure the section properties for transcript DOCX:
|
||||||
- Margins: 1 inch all sides
|
- Margins: 1 inch all sides
|
||||||
|
- Single column layout
|
||||||
- Line numbering: start=1, countBy=1, restart each page
|
- Line numbering: start=1, countBy=1, restart each page
|
||||||
- Disable document grid to avoid off-by-one line numbering
|
- Disable document grid to avoid off-by-one line numbering
|
||||||
"""
|
"""
|
||||||
@@ -334,6 +335,12 @@ def _create_transcript_section_properties(section):
|
|||||||
_set_element_attr(pgMar, "footer", "720")
|
_set_element_attr(pgMar, "footer", "720")
|
||||||
_set_element_attr(pgMar, "gutter", "0")
|
_set_element_attr(pgMar, "gutter", "0")
|
||||||
|
|
||||||
|
# Ensure single column (no multi-column layout that can interfere with line numbering)
|
||||||
|
cols = sectPr.find(f"{{{W_NS}}}cols")
|
||||||
|
if cols is not None:
|
||||||
|
_set_element_attr(cols, "num", "1")
|
||||||
|
_set_element_attr(cols, "space", "720")
|
||||||
|
|
||||||
# Disable document grid to avoid Word counting phantom grid lines
|
# Disable document grid to avoid Word counting phantom grid lines
|
||||||
docGrid = sectPr.find(f"{{{W_NS}}}docGrid")
|
docGrid = sectPr.find(f"{{{W_NS}}}docGrid")
|
||||||
if docGrid is not None:
|
if docGrid is not None:
|
||||||
@@ -351,6 +358,7 @@ def _add_transcript_paragraph(doc, line_text):
|
|||||||
"""
|
"""
|
||||||
Add a single transcript line as a paragraph.
|
Add a single transcript line as a paragraph.
|
||||||
If it matches timestamp + speaker format, format the label differently.
|
If it matches timestamp + speaker format, format the label differently.
|
||||||
|
Uses single line spacing and zero before/after spacing so each paragraph = 1 line.
|
||||||
"""
|
"""
|
||||||
line_text = line_text.strip()
|
line_text = line_text.strip()
|
||||||
if not line_text:
|
if not line_text:
|
||||||
@@ -359,6 +367,15 @@ def _add_transcript_paragraph(doc, line_text):
|
|||||||
p = doc.add_paragraph()
|
p = doc.add_paragraph()
|
||||||
p.paragraph_format.left_indent = Inches(0.25)
|
p.paragraph_format.left_indent = Inches(0.25)
|
||||||
|
|
||||||
|
# Enforce single line spacing with no extra before/after
|
||||||
|
pPr = p._p.get_or_add_pPr()
|
||||||
|
spacing = OxmlElement("w:spacing")
|
||||||
|
_set_element_attr(spacing, "before", "0")
|
||||||
|
_set_element_attr(spacing, "after", "0")
|
||||||
|
_set_element_attr(spacing, "line", "240") # 12pt in twips
|
||||||
|
_set_element_attr(spacing, "lineRule", "auto")
|
||||||
|
pPr.append(spacing)
|
||||||
|
|
||||||
# Try to match: [00:00] SPEAKER 1: content
|
# Try to match: [00:00] SPEAKER 1: content
|
||||||
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line_text)
|
m = re.match(r"\[(\d+:\d+(?::\d+)?)\]\s*(.+?):\s*(.*)", line_text)
|
||||||
if m:
|
if m:
|
||||||
|
|||||||
Reference in New Issue
Block a user