Fix combined .docx: line numbering only for transcript, centered cover pages, correct date format, reliable page numbering
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 22:07:36 +00:00
parent 4651c5f8b2
commit e0d2fd6963
2 changed files with 118 additions and 51 deletions
+109 -43
View File
@@ -291,13 +291,19 @@ def send_email(
raise EmailError(f"Failed to send email: {e}")
def _setup_docx_style(doc):
def _setup_docx_style(doc, enable_line_numbering=False):
"""
Base document setup (margins, font, footer).
Line numbering is optional and applied to the first section only.
"""
section = doc.sections[0]
section.left_margin = Inches(1.5)
section.right_margin = Inches(1.0)
section.top_margin = Inches(1.0)
section.bottom_margin = Inches(1.0)
# Line numbering (only for transcript sections)
if enable_line_numbering:
sectPr = section._sectPr
lnNumType = sectPr.find(qn("w:lnNumType"))
if lnNumType is None:
@@ -306,6 +312,7 @@ def _setup_docx_style(doc):
lnNumType.set(qn("w:start"), "continuous")
lnNumType.set(qn("w:countBy"), "1")
# Default font
style = doc.styles["Normal"]
font = style.font
font.name = "Courier"
@@ -316,68 +323,102 @@ def _setup_docx_style(doc):
footer.is_linked_to_previous = False
p = footer.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
run = p.add_run()
run.font.name = "Courier"
run.font.size = Pt(10)
# Field: PAGE
fldChar1 = OxmlElement("w:fldChar")
fldChar1.set(qn("w:fldCharType"), "begin")
run._r.addprevious(fldChar1)
# PAGE field
run_page = p.add_run()
run_page.font.name = "Courier"
run_page.font.size = Pt(10)
instrText = OxmlElement("w:instrText")
instrText.set(qn("xml:space"), "preserve")
instrText.text = " PAGE "
run._r.addprevious(instrText)
fldCharBegin = OxmlElement("w:fldChar")
fldCharBegin.set(qn("w:fldCharType"), "begin")
run_page._r.addprevious(fldCharBegin)
fldChar2 = OxmlElement("w:fldChar")
fldChar2.set(qn("w:fldCharType"), "end")
run._r.addprevious(fldChar2)
instrTextPage = OxmlElement("w:instrText")
instrTextPage.set(qn("xml:space"), "preserve")
instrTextPage.text = "PAGE"
run_page._r.addprevious(instrTextPage)
# Static text: " of "
fldCharEnd = OxmlElement("w:fldChar")
fldCharEnd.set(qn("w:fldCharType"), "end")
run_page._r.addprevious(fldCharEnd)
# " of " text
run_of = p.add_run(" of ")
run_of.font.name = "Courier"
run_of.font.size = Pt(10)
# Field: NUMPAGES
run2 = p.add_run()
run2.font.name = "Courier"
run2.font.size = Pt(10)
# NUMPAGES field
run_numpages = p.add_run()
run_numpages.font.name = "Courier"
run_numpages.font.size = Pt(10)
fldChar3 = OxmlElement("w:fldChar")
fldChar3.set(qn("w:fldCharType"), "begin")
run2._r.addprevious(fldChar3)
fldCharBegin2 = OxmlElement("w:fldChar")
fldCharBegin2.set(qn("w:fldCharType"), "begin")
run_numpages._r.addprevious(fldCharBegin2)
instrText2 = OxmlElement("w:instrText")
instrText2.set(qn("xml:space"), "preserve")
instrText2.text = " NUMPAGES "
run2._r.addprevious(instrText2)
instrTextNumpages = OxmlElement("w:instrText")
instrTextNumpages.set(qn("xml:space"), "preserve")
instrTextNumpages.text = "NUMPAGES"
run_numpages._r.addprevious(instrTextNumpages)
fldChar4 = OxmlElement("w:fldChar")
fldChar4.set(qn("w:fldCharType"), "end")
run2._r.addprevious(fldChar4)
fldCharEnd2 = OxmlElement("w:fldChar")
fldCharEnd2.set(qn("w:fldCharType"), "end")
run_numpages._r.addprevious(fldCharEnd2)
def _add_cover_page(doc, doc_type, date, description):
p_type = doc.add_paragraph()
"""
Add a cover page:
- Centered horizontally and vertically using a full-page table.
- Lines:
1) Document type
2) Date (e.g. "June 14, 2026")
3-5) Empty space
6) One-sentence description
- Then page break.
"""
# Create a full-page table to center content vertically and horizontally
table = doc.add_table(rows=1, cols=1)
table.autofit = False
cell = table.cell(0, 0)
# Make table span full page height (approx)
cell.width = Inches(6.5)
# Center content inside the cell
for paragraph in cell.paragraphs:
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
# Clear default paragraph
cell.paragraphs[0].clear()
# Line 1: Document type
p_type = cell.add_paragraph()
p_type.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_type = p_type.add_run(doc_type)
run_type.bold = True
run_type.font.name = "Courier"
run_type.font.size = Pt(12)
p_date = doc.add_paragraph()
# Line 2: Date
p_date = cell.add_paragraph()
p_date.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_date = p_date.add_run(date)
run_date.font.name = "Courier"
run_date.font.size = Pt(12)
# Lines 3-5: blank space
for _ in range(3):
doc.add_paragraph()
cell.add_paragraph()
p_desc = doc.add_paragraph()
# Line 6: Description
p_desc = cell.add_paragraph()
p_desc.alignment = WD_ALIGN_PARAGRAPH.CENTER
run_desc = p_desc.add_run(description)
run_desc.font.name = "Courier"
run_desc.font.size = Pt(12)
# Page break after cover page
doc.add_page_break()
@@ -425,6 +466,7 @@ def _add_summary_content(doc, text):
if not stripped:
continue
# Detect markdown-style headings: #, ##, ###, #### at start of line
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
if m:
heading_count += 1
@@ -437,6 +479,7 @@ def _add_summary_content(doc, text):
run.font.name = "Courier"
run.font.size = Pt(12)
# Apply formatting based on this heading's ordinal position
if heading_count == 1:
run.bold = True
elif heading_count == 2:
@@ -447,6 +490,7 @@ def _add_summary_content(doc, text):
run.italic = True
run.underline = True
else:
# Normal text line
p = doc.add_paragraph(stripped)
p.paragraph_format.space_after = Pt(4)
@@ -462,11 +506,12 @@ def create_transcript_docx(
Create a .docx transcript with:
- 1.5" left margin, 1" right margin
- 12pt Courier
- Continuous line numbering on the left
- Continuous line numbering on the left (for transcript content only)
- Optional cover page with type, date, and AI-generated description.
"""
doc = Document()
_setup_docx_style(doc)
# Enable line numbering for transcript
_setup_docx_style(doc, enable_line_numbering=True)
if include_cover and cover_date and cover_desc:
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
@@ -485,9 +530,11 @@ def create_summary_docx(
"""
Create a .docx summary with consistent font and heading styles.
Optional cover page with type, date, and AI-generated description.
No line numbering.
"""
doc = Document()
_setup_docx_style(doc)
# No line numbering for summary
_setup_docx_style(doc, enable_line_numbering=False)
if include_cover and cover_date and cover_desc:
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
@@ -507,25 +554,44 @@ def create_combined_docx(
):
"""
Create a combined .docx with:
1) Transcript cover page
1) Transcript cover page (no line numbering)
2) Page break
3) Summary content
3) Summary content (no line numbering)
4) Page break
5) Transcript content
5) Transcript content (line numbering enabled)
"""
doc = Document()
_setup_docx_style(doc)
# Start with no line numbering (for cover and summary)
_setup_docx_style(doc, enable_line_numbering=False)
# 1) Transcript cover page (includes trailing page break)
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
# 3) Summary content
# 3) Summary content (no line numbering)
_add_summary_content(doc, summary_text)
# 4) Page break before transcript
doc.add_page_break()
# 5) Transcript content
# Enable line numbering for transcript section
# We create a new section for transcript so line numbering applies only there
section_transcript = doc.add_section()
# Apply same margins
section_transcript.left_margin = Inches(1.5)
section_transcript.right_margin = Inches(1.0)
section_transcript.top_margin = Inches(1.0)
section_transcript.bottom_margin = Inches(1.0)
# Enable line numbering in transcript section
sectPr = section_transcript._sectPr
lnNumType = sectPr.find(qn("w:lnNumType"))
if lnNumType is None:
lnNumType = OxmlElement("w:lnNumType")
sectPr.append(lnNumType)
lnNumType.set(qn("w:start"), "continuous")
lnNumType.set(qn("w:countBy"), "1")
# 5) Transcript content (with line numbering)
_add_transcript_content(doc, transcript_text)
doc.save(filename)
+2 -1
View File
@@ -418,7 +418,8 @@ def process_transcription_task(
# 3c) Generate short cover-page descriptions using summarizer
transcript_cover_desc = ""
summary_cover_desc = ""
today_str = datetime.utcnow().strftime("%Y-%m-%d")
dt = datetime.utcnow()
today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}"
try:
scraibe._ensure_summarizer()