Fix combined .docx: line numbering only for transcript, centered cover pages, correct date format, reliable page numbering
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 22:07:36 +00:00
parent 4651c5f8b2
commit e0d2fd6963
2 changed files with 118 additions and 51 deletions
+116 -50
View File
@@ -291,21 +291,28 @@ def send_email(
raise EmailError(f"Failed to send email: {e}") raise EmailError(f"Failed to send email: {e}")
def _setup_docx_style(doc): def _setup_docx_style(doc, enable_line_numbering=False):
"""
Base document setup (margins, font, footer).
Line numbering is optional and applied to the first section only.
"""
section = doc.sections[0] section = doc.sections[0]
section.left_margin = Inches(1.5) section.left_margin = Inches(1.5)
section.right_margin = Inches(1.0) section.right_margin = Inches(1.0)
section.top_margin = Inches(1.0) section.top_margin = Inches(1.0)
section.bottom_margin = Inches(1.0) section.bottom_margin = Inches(1.0)
sectPr = section._sectPr # Line numbering (only for transcript sections)
lnNumType = sectPr.find(qn("w:lnNumType")) if enable_line_numbering:
if lnNumType is None: sectPr = section._sectPr
lnNumType = OxmlElement("w:lnNumType") lnNumType = sectPr.find(qn("w:lnNumType"))
sectPr.append(lnNumType) if lnNumType is None:
lnNumType.set(qn("w:start"), "continuous") lnNumType = OxmlElement("w:lnNumType")
lnNumType.set(qn("w:countBy"), "1") sectPr.append(lnNumType)
lnNumType.set(qn("w:start"), "continuous")
lnNumType.set(qn("w:countBy"), "1")
# Default font
style = doc.styles["Normal"] style = doc.styles["Normal"]
font = style.font font = style.font
font.name = "Courier" font.name = "Courier"
@@ -316,68 +323,102 @@ def _setup_docx_style(doc):
footer.is_linked_to_previous = False footer.is_linked_to_previous = False
p = footer.paragraphs[0] p = footer.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.RIGHT p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = p.add_run()
run.font.name = "Courier"
run.font.size = Pt(10)
# Field: PAGE # PAGE field
fldChar1 = OxmlElement("w:fldChar") run_page = p.add_run()
fldChar1.set(qn("w:fldCharType"), "begin") run_page.font.name = "Courier"
run._r.addprevious(fldChar1) run_page.font.size = Pt(10)
instrText = OxmlElement("w:instrText") fldCharBegin = OxmlElement("w:fldChar")
instrText.set(qn("xml:space"), "preserve") fldCharBegin.set(qn("w:fldCharType"), "begin")
instrText.text = " PAGE " run_page._r.addprevious(fldCharBegin)
run._r.addprevious(instrText)
fldChar2 = OxmlElement("w:fldChar") instrTextPage = OxmlElement("w:instrText")
fldChar2.set(qn("w:fldCharType"), "end") instrTextPage.set(qn("xml:space"), "preserve")
run._r.addprevious(fldChar2) instrTextPage.text = "PAGE"
run_page._r.addprevious(instrTextPage)
# Static text: " of " fldCharEnd = OxmlElement("w:fldChar")
fldCharEnd.set(qn("w:fldCharType"), "end")
run_page._r.addprevious(fldCharEnd)
# " of " text
run_of = p.add_run(" of ") run_of = p.add_run(" of ")
run_of.font.name = "Courier" run_of.font.name = "Courier"
run_of.font.size = Pt(10) run_of.font.size = Pt(10)
# Field: NUMPAGES # NUMPAGES field
run2 = p.add_run() run_numpages = p.add_run()
run2.font.name = "Courier" run_numpages.font.name = "Courier"
run2.font.size = Pt(10) run_numpages.font.size = Pt(10)
fldChar3 = OxmlElement("w:fldChar") fldCharBegin2 = OxmlElement("w:fldChar")
fldChar3.set(qn("w:fldCharType"), "begin") fldCharBegin2.set(qn("w:fldCharType"), "begin")
run2._r.addprevious(fldChar3) run_numpages._r.addprevious(fldCharBegin2)
instrText2 = OxmlElement("w:instrText") instrTextNumpages = OxmlElement("w:instrText")
instrText2.set(qn("xml:space"), "preserve") instrTextNumpages.set(qn("xml:space"), "preserve")
instrText2.text = " NUMPAGES " instrTextNumpages.text = "NUMPAGES"
run2._r.addprevious(instrText2) run_numpages._r.addprevious(instrTextNumpages)
fldChar4 = OxmlElement("w:fldChar") fldCharEnd2 = OxmlElement("w:fldChar")
fldChar4.set(qn("w:fldCharType"), "end") fldCharEnd2.set(qn("w:fldCharType"), "end")
run2._r.addprevious(fldChar4) run_numpages._r.addprevious(fldCharEnd2)
def _add_cover_page(doc, doc_type, date, description): def _add_cover_page(doc, doc_type, date, description):
p_type = doc.add_paragraph() """
Add a cover page:
- Centered horizontally and vertically using a full-page table.
- Lines:
1) Document type
2) Date (e.g. "June 14, 2026")
3-5) Empty space
6) One-sentence description
- Then page break.
"""
# Create a full-page table to center content vertically and horizontally
table = doc.add_table(rows=1, cols=1)
table.autofit = False
cell = table.cell(0, 0)
# Make table span full page height (approx)
cell.width = Inches(6.5)
# Center content inside the cell
for paragraph in cell.paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Clear default paragraph
cell.paragraphs[0].clear()
# Line 1: Document type
p_type = cell.add_paragraph()
p_type.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_type = p_type.add_run(doc_type) run_type = p_type.add_run(doc_type)
run_type.bold = True run_type.bold = True
run_type.font.name = "Courier" run_type.font.name = "Courier"
run_type.font.size = Pt(12) run_type.font.size = Pt(12)
p_date = doc.add_paragraph() # Line 2: Date
p_date = cell.add_paragraph()
p_date.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_date = p_date.add_run(date) run_date = p_date.add_run(date)
run_date.font.name = "Courier" run_date.font.name = "Courier"
run_date.font.size = Pt(12) run_date.font.size = Pt(12)
# Lines 3-5: blank space
for _ in range(3): for _ in range(3):
doc.add_paragraph() cell.add_paragraph()
p_desc = doc.add_paragraph() # Line 6: Description
p_desc = cell.add_paragraph()
p_desc.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_desc = p_desc.add_run(description) run_desc = p_desc.add_run(description)
run_desc.font.name = "Courier" run_desc.font.name = "Courier"
run_desc.font.size = Pt(12) run_desc.font.size = Pt(12)
# Page break after cover page
doc.add_page_break() doc.add_page_break()
@@ -425,6 +466,7 @@ def _add_summary_content(doc, text):
if not stripped: if not stripped:
continue continue
# Detect markdown-style headings: #, ##, ###, #### at start of line
m = re.match(r"^(#{1,4})\s+(.*)", stripped) m = re.match(r"^(#{1,4})\s+(.*)", stripped)
if m: if m:
heading_count += 1 heading_count += 1
@@ -437,6 +479,7 @@ def _add_summary_content(doc, text):
run.font.name = "Courier" run.font.name = "Courier"
run.font.size = Pt(12) run.font.size = Pt(12)
# Apply formatting based on this heading's ordinal position
if heading_count == 1: if heading_count == 1:
run.bold = True run.bold = True
elif heading_count == 2: elif heading_count == 2:
@@ -447,6 +490,7 @@ def _add_summary_content(doc, text):
run.italic = True run.italic = True
run.underline = True run.underline = True
else: else:
# Normal text line
p = doc.add_paragraph(stripped) p = doc.add_paragraph(stripped)
p.paragraph_format.space_after = Pt(4) p.paragraph_format.space_after = Pt(4)
@@ -462,11 +506,12 @@ def create_transcript_docx(
Create a .docx transcript with: Create a .docx transcript with:
- 1.5" left margin, 1" right margin - 1.5" left margin, 1" right margin
- 12pt Courier - 12pt Courier
- Continuous line numbering on the left - Continuous line numbering on the left (for transcript content only)
- Optional cover page with type, date, and AI-generated description. - Optional cover page with type, date, and AI-generated description.
""" """
doc = Document() doc = Document()
_setup_docx_style(doc) # Enable line numbering for transcript
_setup_docx_style(doc, enable_line_numbering=True)
if include_cover and cover_date and cover_desc: if include_cover and cover_date and cover_desc:
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc) _add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
@@ -485,9 +530,11 @@ def create_summary_docx(
""" """
Create a .docx summary with consistent font and heading styles. Create a .docx summary with consistent font and heading styles.
Optional cover page with type, date, and AI-generated description. Optional cover page with type, date, and AI-generated description.
No line numbering.
""" """
doc = Document() doc = Document()
_setup_docx_style(doc) # No line numbering for summary
_setup_docx_style(doc, enable_line_numbering=False)
if include_cover and cover_date and cover_desc: if include_cover and cover_date and cover_desc:
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc) _add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
@@ -507,25 +554,44 @@ def create_combined_docx(
): ):
""" """
Create a combined .docx with: Create a combined .docx with:
1) Transcript cover page 1) Transcript cover page (no line numbering)
2) Page break 2) Page break
3) Summary content 3) Summary content (no line numbering)
4) Page break 4) Page break
5) Transcript content 5) Transcript content (line numbering enabled)
""" """
doc = Document() doc = Document()
_setup_docx_style(doc) # Start with no line numbering (for cover and summary)
_setup_docx_style(doc, enable_line_numbering=False)
# 1) Transcript cover page (includes trailing page break) # 1) Transcript cover page (includes trailing page break)
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc) _add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
# 3) Summary content # 3) Summary content (no line numbering)
_add_summary_content(doc, summary_text) _add_summary_content(doc, summary_text)
# 4) Page break before transcript # 4) Page break before transcript
doc.add_page_break() doc.add_page_break()
# 5) Transcript content # Enable line numbering for transcript section
# We create a new section for transcript so line numbering applies only there
section_transcript = doc.add_section()
# Apply same margins
section_transcript.left_margin = Inches(1.5)
section_transcript.right_margin = Inches(1.0)
section_transcript.top_margin = Inches(1.0)
section_transcript.bottom_margin = Inches(1.0)
# Enable line numbering in transcript section
sectPr = section_transcript._sectPr
lnNumType = sectPr.find(qn("w:lnNumType"))
if lnNumType is None:
lnNumType = OxmlElement("w:lnNumType")
sectPr.append(lnNumType)
lnNumType.set(qn("w:start"), "continuous")
lnNumType.set(qn("w:countBy"), "1")
# 5) Transcript content (with line numbering)
_add_transcript_content(doc, transcript_text) _add_transcript_content(doc, transcript_text)
doc.save(filename) doc.save(filename)
+2 -1
View File
@@ -418,7 +418,8 @@ def process_transcription_task(
# 3c) Generate short cover-page descriptions using summarizer # 3c) Generate short cover-page descriptions using summarizer
transcript_cover_desc = "" transcript_cover_desc = ""
summary_cover_desc = "" summary_cover_desc = ""
today_str = datetime.utcnow().strftime("%Y-%m-%d") dt = datetime.utcnow()
today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}"
try: try:
scraibe._ensure_summarizer() scraibe._ensure_summarizer()