Fix combined .docx: line numbering only for transcript, centered cover pages, correct date format, reliable page numbering
This commit is contained in:
+109
-43
@@ -291,13 +291,19 @@ def send_email(
|
||||
raise EmailError(f"Failed to send email: {e}")
|
||||
|
||||
|
||||
def _setup_docx_style(doc):
|
||||
def _setup_docx_style(doc, enable_line_numbering=False):
|
||||
"""
|
||||
Base document setup (margins, font, footer).
|
||||
Line numbering is optional and applied to the first section only.
|
||||
"""
|
||||
section = doc.sections[0]
|
||||
section.left_margin = Inches(1.5)
|
||||
section.right_margin = Inches(1.0)
|
||||
section.top_margin = Inches(1.0)
|
||||
section.bottom_margin = Inches(1.0)
|
||||
|
||||
# Line numbering (only for transcript sections)
|
||||
if enable_line_numbering:
|
||||
sectPr = section._sectPr
|
||||
lnNumType = sectPr.find(qn("w:lnNumType"))
|
||||
if lnNumType is None:
|
||||
@@ -306,6 +312,7 @@ def _setup_docx_style(doc):
|
||||
lnNumType.set(qn("w:start"), "continuous")
|
||||
lnNumType.set(qn("w:countBy"), "1")
|
||||
|
||||
# Default font
|
||||
style = doc.styles["Normal"]
|
||||
font = style.font
|
||||
font.name = "Courier"
|
||||
@@ -316,68 +323,102 @@ def _setup_docx_style(doc):
|
||||
footer.is_linked_to_previous = False
|
||||
p = footer.paragraphs[0]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||
run = p.add_run()
|
||||
run.font.name = "Courier"
|
||||
run.font.size = Pt(10)
|
||||
|
||||
# Field: PAGE
|
||||
fldChar1 = OxmlElement("w:fldChar")
|
||||
fldChar1.set(qn("w:fldCharType"), "begin")
|
||||
run._r.addprevious(fldChar1)
|
||||
# PAGE field
|
||||
run_page = p.add_run()
|
||||
run_page.font.name = "Courier"
|
||||
run_page.font.size = Pt(10)
|
||||
|
||||
instrText = OxmlElement("w:instrText")
|
||||
instrText.set(qn("xml:space"), "preserve")
|
||||
instrText.text = " PAGE "
|
||||
run._r.addprevious(instrText)
|
||||
fldCharBegin = OxmlElement("w:fldChar")
|
||||
fldCharBegin.set(qn("w:fldCharType"), "begin")
|
||||
run_page._r.addprevious(fldCharBegin)
|
||||
|
||||
fldChar2 = OxmlElement("w:fldChar")
|
||||
fldChar2.set(qn("w:fldCharType"), "end")
|
||||
run._r.addprevious(fldChar2)
|
||||
instrTextPage = OxmlElement("w:instrText")
|
||||
instrTextPage.set(qn("xml:space"), "preserve")
|
||||
instrTextPage.text = "PAGE"
|
||||
run_page._r.addprevious(instrTextPage)
|
||||
|
||||
# Static text: " of "
|
||||
fldCharEnd = OxmlElement("w:fldChar")
|
||||
fldCharEnd.set(qn("w:fldCharType"), "end")
|
||||
run_page._r.addprevious(fldCharEnd)
|
||||
|
||||
# " of " text
|
||||
run_of = p.add_run(" of ")
|
||||
run_of.font.name = "Courier"
|
||||
run_of.font.size = Pt(10)
|
||||
|
||||
# Field: NUMPAGES
|
||||
run2 = p.add_run()
|
||||
run2.font.name = "Courier"
|
||||
run2.font.size = Pt(10)
|
||||
# NUMPAGES field
|
||||
run_numpages = p.add_run()
|
||||
run_numpages.font.name = "Courier"
|
||||
run_numpages.font.size = Pt(10)
|
||||
|
||||
fldChar3 = OxmlElement("w:fldChar")
|
||||
fldChar3.set(qn("w:fldCharType"), "begin")
|
||||
run2._r.addprevious(fldChar3)
|
||||
fldCharBegin2 = OxmlElement("w:fldChar")
|
||||
fldCharBegin2.set(qn("w:fldCharType"), "begin")
|
||||
run_numpages._r.addprevious(fldCharBegin2)
|
||||
|
||||
instrText2 = OxmlElement("w:instrText")
|
||||
instrText2.set(qn("xml:space"), "preserve")
|
||||
instrText2.text = " NUMPAGES "
|
||||
run2._r.addprevious(instrText2)
|
||||
instrTextNumpages = OxmlElement("w:instrText")
|
||||
instrTextNumpages.set(qn("xml:space"), "preserve")
|
||||
instrTextNumpages.text = "NUMPAGES"
|
||||
run_numpages._r.addprevious(instrTextNumpages)
|
||||
|
||||
fldChar4 = OxmlElement("w:fldChar")
|
||||
fldChar4.set(qn("w:fldCharType"), "end")
|
||||
run2._r.addprevious(fldChar4)
|
||||
fldCharEnd2 = OxmlElement("w:fldChar")
|
||||
fldCharEnd2.set(qn("w:fldCharType"), "end")
|
||||
run_numpages._r.addprevious(fldCharEnd2)
|
||||
|
||||
|
||||
def _add_cover_page(doc, doc_type, date, description):
|
||||
p_type = doc.add_paragraph()
|
||||
"""
|
||||
Add a cover page:
|
||||
- Centered horizontally and vertically using a full-page table.
|
||||
- Lines:
|
||||
1) Document type
|
||||
2) Date (e.g. "June 14, 2026")
|
||||
3-5) Empty space
|
||||
6) One-sentence description
|
||||
- Then page break.
|
||||
"""
|
||||
# Create a full-page table to center content vertically and horizontally
|
||||
table = doc.add_table(rows=1, cols=1)
|
||||
table.autofit = False
|
||||
cell = table.cell(0, 0)
|
||||
|
||||
# Make table span full page height (approx)
|
||||
cell.width = Inches(6.5)
|
||||
|
||||
# Center content inside the cell
|
||||
for paragraph in cell.paragraphs:
|
||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
|
||||
# Clear default paragraph
|
||||
cell.paragraphs[0].clear()
|
||||
|
||||
# Line 1: Document type
|
||||
p_type = cell.add_paragraph()
|
||||
p_type.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run_type = p_type.add_run(doc_type)
|
||||
run_type.bold = True
|
||||
run_type.font.name = "Courier"
|
||||
run_type.font.size = Pt(12)
|
||||
|
||||
p_date = doc.add_paragraph()
|
||||
# Line 2: Date
|
||||
p_date = cell.add_paragraph()
|
||||
p_date.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run_date = p_date.add_run(date)
|
||||
run_date.font.name = "Courier"
|
||||
run_date.font.size = Pt(12)
|
||||
|
||||
# Lines 3-5: blank space
|
||||
for _ in range(3):
|
||||
doc.add_paragraph()
|
||||
cell.add_paragraph()
|
||||
|
||||
p_desc = doc.add_paragraph()
|
||||
# Line 6: Description
|
||||
p_desc = cell.add_paragraph()
|
||||
p_desc.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
run_desc = p_desc.add_run(description)
|
||||
run_desc.font.name = "Courier"
|
||||
run_desc.font.size = Pt(12)
|
||||
|
||||
# Page break after cover page
|
||||
doc.add_page_break()
|
||||
|
||||
|
||||
@@ -425,6 +466,7 @@ def _add_summary_content(doc, text):
|
||||
if not stripped:
|
||||
continue
|
||||
|
||||
# Detect markdown-style headings: #, ##, ###, #### at start of line
|
||||
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
|
||||
if m:
|
||||
heading_count += 1
|
||||
@@ -437,6 +479,7 @@ def _add_summary_content(doc, text):
|
||||
run.font.name = "Courier"
|
||||
run.font.size = Pt(12)
|
||||
|
||||
# Apply formatting based on this heading's ordinal position
|
||||
if heading_count == 1:
|
||||
run.bold = True
|
||||
elif heading_count == 2:
|
||||
@@ -447,6 +490,7 @@ def _add_summary_content(doc, text):
|
||||
run.italic = True
|
||||
run.underline = True
|
||||
else:
|
||||
# Normal text line
|
||||
p = doc.add_paragraph(stripped)
|
||||
p.paragraph_format.space_after = Pt(4)
|
||||
|
||||
@@ -462,11 +506,12 @@ def create_transcript_docx(
|
||||
Create a .docx transcript with:
|
||||
- 1.5" left margin, 1" right margin
|
||||
- 12pt Courier
|
||||
- Continuous line numbering on the left
|
||||
- Continuous line numbering on the left (for transcript content only)
|
||||
- Optional cover page with type, date, and AI-generated description.
|
||||
"""
|
||||
doc = Document()
|
||||
_setup_docx_style(doc)
|
||||
# Enable line numbering for transcript
|
||||
_setup_docx_style(doc, enable_line_numbering=True)
|
||||
|
||||
if include_cover and cover_date and cover_desc:
|
||||
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
|
||||
@@ -485,9 +530,11 @@ def create_summary_docx(
|
||||
"""
|
||||
Create a .docx summary with consistent font and heading styles.
|
||||
Optional cover page with type, date, and AI-generated description.
|
||||
No line numbering.
|
||||
"""
|
||||
doc = Document()
|
||||
_setup_docx_style(doc)
|
||||
# No line numbering for summary
|
||||
_setup_docx_style(doc, enable_line_numbering=False)
|
||||
|
||||
if include_cover and cover_date and cover_desc:
|
||||
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
|
||||
@@ -507,25 +554,44 @@ def create_combined_docx(
|
||||
):
|
||||
"""
|
||||
Create a combined .docx with:
|
||||
1) Transcript cover page
|
||||
1) Transcript cover page (no line numbering)
|
||||
2) Page break
|
||||
3) Summary content
|
||||
3) Summary content (no line numbering)
|
||||
4) Page break
|
||||
5) Transcript content
|
||||
5) Transcript content (line numbering enabled)
|
||||
"""
|
||||
doc = Document()
|
||||
_setup_docx_style(doc)
|
||||
# Start with no line numbering (for cover and summary)
|
||||
_setup_docx_style(doc, enable_line_numbering=False)
|
||||
|
||||
# 1) Transcript cover page (includes trailing page break)
|
||||
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
||||
|
||||
# 3) Summary content
|
||||
# 3) Summary content (no line numbering)
|
||||
_add_summary_content(doc, summary_text)
|
||||
|
||||
# 4) Page break before transcript
|
||||
doc.add_page_break()
|
||||
|
||||
# 5) Transcript content
|
||||
# Enable line numbering for transcript section
|
||||
# We create a new section for transcript so line numbering applies only there
|
||||
section_transcript = doc.add_section()
|
||||
# Apply same margins
|
||||
section_transcript.left_margin = Inches(1.5)
|
||||
section_transcript.right_margin = Inches(1.0)
|
||||
section_transcript.top_margin = Inches(1.0)
|
||||
section_transcript.bottom_margin = Inches(1.0)
|
||||
|
||||
# Enable line numbering in transcript section
|
||||
sectPr = section_transcript._sectPr
|
||||
lnNumType = sectPr.find(qn("w:lnNumType"))
|
||||
if lnNumType is None:
|
||||
lnNumType = OxmlElement("w:lnNumType")
|
||||
sectPr.append(lnNumType)
|
||||
lnNumType.set(qn("w:start"), "continuous")
|
||||
lnNumType.set(qn("w:countBy"), "1")
|
||||
|
||||
# 5) Transcript content (with line numbering)
|
||||
_add_transcript_content(doc, transcript_text)
|
||||
|
||||
doc.save(filename)
|
||||
|
||||
+2
-1
@@ -418,7 +418,8 @@ def process_transcription_task(
|
||||
# 3c) Generate short cover-page descriptions using summarizer
|
||||
transcript_cover_desc = ""
|
||||
summary_cover_desc = ""
|
||||
today_str = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
dt = datetime.utcnow()
|
||||
today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}"
|
||||
|
||||
try:
|
||||
scraibe._ensure_summarizer()
|
||||
|
||||
Reference in New Issue
Block a user