Fix combined .docx: line numbering only for transcript, centered cover pages, correct date format, reliable page numbering
This commit is contained in:
+116
-50
@@ -291,21 +291,28 @@ def send_email(
|
|||||||
raise EmailError(f"Failed to send email: {e}")
|
raise EmailError(f"Failed to send email: {e}")
|
||||||
|
|
||||||
|
|
||||||
def _setup_docx_style(doc):
|
def _setup_docx_style(doc, enable_line_numbering=False):
|
||||||
|
"""
|
||||||
|
Base document setup (margins, font, footer).
|
||||||
|
Line numbering is optional and applied to the first section only.
|
||||||
|
"""
|
||||||
section = doc.sections[0]
|
section = doc.sections[0]
|
||||||
section.left_margin = Inches(1.5)
|
section.left_margin = Inches(1.5)
|
||||||
section.right_margin = Inches(1.0)
|
section.right_margin = Inches(1.0)
|
||||||
section.top_margin = Inches(1.0)
|
section.top_margin = Inches(1.0)
|
||||||
section.bottom_margin = Inches(1.0)
|
section.bottom_margin = Inches(1.0)
|
||||||
|
|
||||||
sectPr = section._sectPr
|
# Line numbering (only for transcript sections)
|
||||||
lnNumType = sectPr.find(qn("w:lnNumType"))
|
if enable_line_numbering:
|
||||||
if lnNumType is None:
|
sectPr = section._sectPr
|
||||||
lnNumType = OxmlElement("w:lnNumType")
|
lnNumType = sectPr.find(qn("w:lnNumType"))
|
||||||
sectPr.append(lnNumType)
|
if lnNumType is None:
|
||||||
lnNumType.set(qn("w:start"), "continuous")
|
lnNumType = OxmlElement("w:lnNumType")
|
||||||
lnNumType.set(qn("w:countBy"), "1")
|
sectPr.append(lnNumType)
|
||||||
|
lnNumType.set(qn("w:start"), "continuous")
|
||||||
|
lnNumType.set(qn("w:countBy"), "1")
|
||||||
|
|
||||||
|
# Default font
|
||||||
style = doc.styles["Normal"]
|
style = doc.styles["Normal"]
|
||||||
font = style.font
|
font = style.font
|
||||||
font.name = "Courier"
|
font.name = "Courier"
|
||||||
@@ -316,68 +323,102 @@ def _setup_docx_style(doc):
|
|||||||
footer.is_linked_to_previous = False
|
footer.is_linked_to_previous = False
|
||||||
p = footer.paragraphs[0]
|
p = footer.paragraphs[0]
|
||||||
p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||||||
run = p.add_run()
|
|
||||||
run.font.name = "Courier"
|
|
||||||
run.font.size = Pt(10)
|
|
||||||
|
|
||||||
# Field: PAGE
|
# PAGE field
|
||||||
fldChar1 = OxmlElement("w:fldChar")
|
run_page = p.add_run()
|
||||||
fldChar1.set(qn("w:fldCharType"), "begin")
|
run_page.font.name = "Courier"
|
||||||
run._r.addprevious(fldChar1)
|
run_page.font.size = Pt(10)
|
||||||
|
|
||||||
instrText = OxmlElement("w:instrText")
|
fldCharBegin = OxmlElement("w:fldChar")
|
||||||
instrText.set(qn("xml:space"), "preserve")
|
fldCharBegin.set(qn("w:fldCharType"), "begin")
|
||||||
instrText.text = " PAGE "
|
run_page._r.addprevious(fldCharBegin)
|
||||||
run._r.addprevious(instrText)
|
|
||||||
|
|
||||||
fldChar2 = OxmlElement("w:fldChar")
|
instrTextPage = OxmlElement("w:instrText")
|
||||||
fldChar2.set(qn("w:fldCharType"), "end")
|
instrTextPage.set(qn("xml:space"), "preserve")
|
||||||
run._r.addprevious(fldChar2)
|
instrTextPage.text = "PAGE"
|
||||||
|
run_page._r.addprevious(instrTextPage)
|
||||||
|
|
||||||
# Static text: " of "
|
fldCharEnd = OxmlElement("w:fldChar")
|
||||||
|
fldCharEnd.set(qn("w:fldCharType"), "end")
|
||||||
|
run_page._r.addprevious(fldCharEnd)
|
||||||
|
|
||||||
|
# " of " text
|
||||||
run_of = p.add_run(" of ")
|
run_of = p.add_run(" of ")
|
||||||
run_of.font.name = "Courier"
|
run_of.font.name = "Courier"
|
||||||
run_of.font.size = Pt(10)
|
run_of.font.size = Pt(10)
|
||||||
|
|
||||||
# Field: NUMPAGES
|
# NUMPAGES field
|
||||||
run2 = p.add_run()
|
run_numpages = p.add_run()
|
||||||
run2.font.name = "Courier"
|
run_numpages.font.name = "Courier"
|
||||||
run2.font.size = Pt(10)
|
run_numpages.font.size = Pt(10)
|
||||||
|
|
||||||
fldChar3 = OxmlElement("w:fldChar")
|
fldCharBegin2 = OxmlElement("w:fldChar")
|
||||||
fldChar3.set(qn("w:fldCharType"), "begin")
|
fldCharBegin2.set(qn("w:fldCharType"), "begin")
|
||||||
run2._r.addprevious(fldChar3)
|
run_numpages._r.addprevious(fldCharBegin2)
|
||||||
|
|
||||||
instrText2 = OxmlElement("w:instrText")
|
instrTextNumpages = OxmlElement("w:instrText")
|
||||||
instrText2.set(qn("xml:space"), "preserve")
|
instrTextNumpages.set(qn("xml:space"), "preserve")
|
||||||
instrText2.text = " NUMPAGES "
|
instrTextNumpages.text = "NUMPAGES"
|
||||||
run2._r.addprevious(instrText2)
|
run_numpages._r.addprevious(instrTextNumpages)
|
||||||
|
|
||||||
fldChar4 = OxmlElement("w:fldChar")
|
fldCharEnd2 = OxmlElement("w:fldChar")
|
||||||
fldChar4.set(qn("w:fldCharType"), "end")
|
fldCharEnd2.set(qn("w:fldCharType"), "end")
|
||||||
run2._r.addprevious(fldChar4)
|
run_numpages._r.addprevious(fldCharEnd2)
|
||||||
|
|
||||||
|
|
||||||
def _add_cover_page(doc, doc_type, date, description):
|
def _add_cover_page(doc, doc_type, date, description):
|
||||||
p_type = doc.add_paragraph()
|
"""
|
||||||
|
Add a cover page:
|
||||||
|
- Centered horizontally and vertically using a full-page table.
|
||||||
|
- Lines:
|
||||||
|
1) Document type
|
||||||
|
2) Date (e.g. "June 14, 2026")
|
||||||
|
3-5) Empty space
|
||||||
|
6) One-sentence description
|
||||||
|
- Then page break.
|
||||||
|
"""
|
||||||
|
# Create a full-page table to center content vertically and horizontally
|
||||||
|
table = doc.add_table(rows=1, cols=1)
|
||||||
|
table.autofit = False
|
||||||
|
cell = table.cell(0, 0)
|
||||||
|
|
||||||
|
# Make table span full page height (approx)
|
||||||
|
cell.width = Inches(6.5)
|
||||||
|
|
||||||
|
# Center content inside the cell
|
||||||
|
for paragraph in cell.paragraphs:
|
||||||
|
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
|
|
||||||
|
# Clear default paragraph
|
||||||
|
cell.paragraphs[0].clear()
|
||||||
|
|
||||||
|
# Line 1: Document type
|
||||||
|
p_type = cell.add_paragraph()
|
||||||
|
p_type.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
run_type = p_type.add_run(doc_type)
|
run_type = p_type.add_run(doc_type)
|
||||||
run_type.bold = True
|
run_type.bold = True
|
||||||
run_type.font.name = "Courier"
|
run_type.font.name = "Courier"
|
||||||
run_type.font.size = Pt(12)
|
run_type.font.size = Pt(12)
|
||||||
|
|
||||||
p_date = doc.add_paragraph()
|
# Line 2: Date
|
||||||
|
p_date = cell.add_paragraph()
|
||||||
|
p_date.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
run_date = p_date.add_run(date)
|
run_date = p_date.add_run(date)
|
||||||
run_date.font.name = "Courier"
|
run_date.font.name = "Courier"
|
||||||
run_date.font.size = Pt(12)
|
run_date.font.size = Pt(12)
|
||||||
|
|
||||||
|
# Lines 3-5: blank space
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
doc.add_paragraph()
|
cell.add_paragraph()
|
||||||
|
|
||||||
p_desc = doc.add_paragraph()
|
# Line 6: Description
|
||||||
|
p_desc = cell.add_paragraph()
|
||||||
|
p_desc.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||||
run_desc = p_desc.add_run(description)
|
run_desc = p_desc.add_run(description)
|
||||||
run_desc.font.name = "Courier"
|
run_desc.font.name = "Courier"
|
||||||
run_desc.font.size = Pt(12)
|
run_desc.font.size = Pt(12)
|
||||||
|
|
||||||
|
# Page break after cover page
|
||||||
doc.add_page_break()
|
doc.add_page_break()
|
||||||
|
|
||||||
|
|
||||||
@@ -425,6 +466,7 @@ def _add_summary_content(doc, text):
|
|||||||
if not stripped:
|
if not stripped:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Detect markdown-style headings: #, ##, ###, #### at start of line
|
||||||
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
|
m = re.match(r"^(#{1,4})\s+(.*)", stripped)
|
||||||
if m:
|
if m:
|
||||||
heading_count += 1
|
heading_count += 1
|
||||||
@@ -437,6 +479,7 @@ def _add_summary_content(doc, text):
|
|||||||
run.font.name = "Courier"
|
run.font.name = "Courier"
|
||||||
run.font.size = Pt(12)
|
run.font.size = Pt(12)
|
||||||
|
|
||||||
|
# Apply formatting based on this heading's ordinal position
|
||||||
if heading_count == 1:
|
if heading_count == 1:
|
||||||
run.bold = True
|
run.bold = True
|
||||||
elif heading_count == 2:
|
elif heading_count == 2:
|
||||||
@@ -447,6 +490,7 @@ def _add_summary_content(doc, text):
|
|||||||
run.italic = True
|
run.italic = True
|
||||||
run.underline = True
|
run.underline = True
|
||||||
else:
|
else:
|
||||||
|
# Normal text line
|
||||||
p = doc.add_paragraph(stripped)
|
p = doc.add_paragraph(stripped)
|
||||||
p.paragraph_format.space_after = Pt(4)
|
p.paragraph_format.space_after = Pt(4)
|
||||||
|
|
||||||
@@ -462,11 +506,12 @@ def create_transcript_docx(
|
|||||||
Create a .docx transcript with:
|
Create a .docx transcript with:
|
||||||
- 1.5" left margin, 1" right margin
|
- 1.5" left margin, 1" right margin
|
||||||
- 12pt Courier
|
- 12pt Courier
|
||||||
- Continuous line numbering on the left
|
- Continuous line numbering on the left (for transcript content only)
|
||||||
- Optional cover page with type, date, and AI-generated description.
|
- Optional cover page with type, date, and AI-generated description.
|
||||||
"""
|
"""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
_setup_docx_style(doc)
|
# Enable line numbering for transcript
|
||||||
|
_setup_docx_style(doc, enable_line_numbering=True)
|
||||||
|
|
||||||
if include_cover and cover_date and cover_desc:
|
if include_cover and cover_date and cover_desc:
|
||||||
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
|
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
|
||||||
@@ -485,9 +530,11 @@ def create_summary_docx(
|
|||||||
"""
|
"""
|
||||||
Create a .docx summary with consistent font and heading styles.
|
Create a .docx summary with consistent font and heading styles.
|
||||||
Optional cover page with type, date, and AI-generated description.
|
Optional cover page with type, date, and AI-generated description.
|
||||||
|
No line numbering.
|
||||||
"""
|
"""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
_setup_docx_style(doc)
|
# No line numbering for summary
|
||||||
|
_setup_docx_style(doc, enable_line_numbering=False)
|
||||||
|
|
||||||
if include_cover and cover_date and cover_desc:
|
if include_cover and cover_date and cover_desc:
|
||||||
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
|
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
|
||||||
@@ -507,25 +554,44 @@ def create_combined_docx(
|
|||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a combined .docx with:
|
Create a combined .docx with:
|
||||||
1) Transcript cover page
|
1) Transcript cover page (no line numbering)
|
||||||
2) Page break
|
2) Page break
|
||||||
3) Summary content
|
3) Summary content (no line numbering)
|
||||||
4) Page break
|
4) Page break
|
||||||
5) Transcript content
|
5) Transcript content (line numbering enabled)
|
||||||
"""
|
"""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
_setup_docx_style(doc)
|
# Start with no line numbering (for cover and summary)
|
||||||
|
_setup_docx_style(doc, enable_line_numbering=False)
|
||||||
|
|
||||||
# 1) Transcript cover page (includes trailing page break)
|
# 1) Transcript cover page (includes trailing page break)
|
||||||
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
||||||
|
|
||||||
# 3) Summary content
|
# 3) Summary content (no line numbering)
|
||||||
_add_summary_content(doc, summary_text)
|
_add_summary_content(doc, summary_text)
|
||||||
|
|
||||||
# 4) Page break before transcript
|
# 4) Page break before transcript
|
||||||
doc.add_page_break()
|
doc.add_page_break()
|
||||||
|
|
||||||
# 5) Transcript content
|
# Enable line numbering for transcript section
|
||||||
|
# We create a new section for transcript so line numbering applies only there
|
||||||
|
section_transcript = doc.add_section()
|
||||||
|
# Apply same margins
|
||||||
|
section_transcript.left_margin = Inches(1.5)
|
||||||
|
section_transcript.right_margin = Inches(1.0)
|
||||||
|
section_transcript.top_margin = Inches(1.0)
|
||||||
|
section_transcript.bottom_margin = Inches(1.0)
|
||||||
|
|
||||||
|
# Enable line numbering in transcript section
|
||||||
|
sectPr = section_transcript._sectPr
|
||||||
|
lnNumType = sectPr.find(qn("w:lnNumType"))
|
||||||
|
if lnNumType is None:
|
||||||
|
lnNumType = OxmlElement("w:lnNumType")
|
||||||
|
sectPr.append(lnNumType)
|
||||||
|
lnNumType.set(qn("w:start"), "continuous")
|
||||||
|
lnNumType.set(qn("w:countBy"), "1")
|
||||||
|
|
||||||
|
# 5) Transcript content (with line numbering)
|
||||||
_add_transcript_content(doc, transcript_text)
|
_add_transcript_content(doc, transcript_text)
|
||||||
|
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
|
|||||||
+2
-1
@@ -418,7 +418,8 @@ def process_transcription_task(
|
|||||||
# 3c) Generate short cover-page descriptions using summarizer
|
# 3c) Generate short cover-page descriptions using summarizer
|
||||||
transcript_cover_desc = ""
|
transcript_cover_desc = ""
|
||||||
summary_cover_desc = ""
|
summary_cover_desc = ""
|
||||||
today_str = datetime.utcnow().strftime("%Y-%m-%d")
|
dt = datetime.utcnow()
|
||||||
|
today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
scraibe._ensure_summarizer()
|
scraibe._ensure_summarizer()
|
||||||
|
|||||||
Reference in New Issue
Block a user