From e0d2fd6963b93668bd670a1a59aee85f8aa0f893 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 14 Jun 2026 22:07:36 +0000 Subject: [PATCH] Fix combined .docx: line numbering only for transcript, centered cover pages, correct date format, reliable page numbering --- scraibe/email_sender.py | 166 ++++++++++++++++++++++++++++------------ scraibe/tasks.py | 3 +- 2 files changed, 118 insertions(+), 51 deletions(-) diff --git a/scraibe/email_sender.py b/scraibe/email_sender.py index 349a03c..b1c7dfe 100644 --- a/scraibe/email_sender.py +++ b/scraibe/email_sender.py @@ -291,21 +291,28 @@ def send_email( raise EmailError(f"Failed to send email: {e}") -def _setup_docx_style(doc): +def _setup_docx_style(doc, enable_line_numbering=False): + """ + Base document setup (margins, font, footer). + Line numbering is optional and applied to the first section only. + """ section = doc.sections[0] section.left_margin = Inches(1.5) section.right_margin = Inches(1.0) section.top_margin = Inches(1.0) section.bottom_margin = Inches(1.0) - sectPr = section._sectPr - lnNumType = sectPr.find(qn("w:lnNumType")) - if lnNumType is None: - lnNumType = OxmlElement("w:lnNumType") - sectPr.append(lnNumType) - lnNumType.set(qn("w:start"), "continuous") - lnNumType.set(qn("w:countBy"), "1") + # Line numbering (only for transcript sections) + if enable_line_numbering: + sectPr = section._sectPr + lnNumType = sectPr.find(qn("w:lnNumType")) + if lnNumType is None: + lnNumType = OxmlElement("w:lnNumType") + sectPr.append(lnNumType) + lnNumType.set(qn("w:start"), "continuous") + lnNumType.set(qn("w:countBy"), "1") + # Default font style = doc.styles["Normal"] font = style.font font.name = "Courier" @@ -316,68 +323,102 @@ def _setup_docx_style(doc): footer.is_linked_to_previous = False p = footer.paragraphs[0] p.alignment = WD_ALIGN_PARAGRAPH.RIGHT - run = p.add_run() - run.font.name = "Courier" - run.font.size = Pt(10) - # Field: PAGE - fldChar1 = OxmlElement("w:fldChar") - fldChar1.set(qn("w:fldCharType"), "begin") - run._r.addprevious(fldChar1) + # PAGE field + run_page = p.add_run() + run_page.font.name = "Courier" + run_page.font.size = Pt(10) - instrText = OxmlElement("w:instrText") - instrText.set(qn("xml:space"), "preserve") - instrText.text = " PAGE " - run._r.addprevious(instrText) + fldCharBegin = OxmlElement("w:fldChar") + fldCharBegin.set(qn("w:fldCharType"), "begin") + run_page._r.addprevious(fldCharBegin) - fldChar2 = OxmlElement("w:fldChar") - fldChar2.set(qn("w:fldCharType"), "end") - run._r.addprevious(fldChar2) + instrTextPage = OxmlElement("w:instrText") + instrTextPage.set(qn("xml:space"), "preserve") + instrTextPage.text = "PAGE" + run_page._r.addprevious(instrTextPage) - # Static text: " of " + fldCharEnd = OxmlElement("w:fldChar") + fldCharEnd.set(qn("w:fldCharType"), "end") + run_page._r.addprevious(fldCharEnd) + + # " of " text run_of = p.add_run(" of ") run_of.font.name = "Courier" run_of.font.size = Pt(10) - # Field: NUMPAGES - run2 = p.add_run() - run2.font.name = "Courier" - run2.font.size = Pt(10) + # NUMPAGES field + run_numpages = p.add_run() + run_numpages.font.name = "Courier" + run_numpages.font.size = Pt(10) - fldChar3 = OxmlElement("w:fldChar") - fldChar3.set(qn("w:fldCharType"), "begin") - run2._r.addprevious(fldChar3) + fldCharBegin2 = OxmlElement("w:fldChar") + fldCharBegin2.set(qn("w:fldCharType"), "begin") + run_numpages._r.addprevious(fldCharBegin2) - instrText2 = OxmlElement("w:instrText") - instrText2.set(qn("xml:space"), "preserve") - instrText2.text = " NUMPAGES " - run2._r.addprevious(instrText2) + instrTextNumpages = OxmlElement("w:instrText") + instrTextNumpages.set(qn("xml:space"), "preserve") + instrTextNumpages.text = "NUMPAGES" + run_numpages._r.addprevious(instrTextNumpages) - fldChar4 = OxmlElement("w:fldChar") - fldChar4.set(qn("w:fldCharType"), "end") - run2._r.addprevious(fldChar4) + fldCharEnd2 = OxmlElement("w:fldChar") + fldCharEnd2.set(qn("w:fldCharType"), "end") + run_numpages._r.addprevious(fldCharEnd2) def _add_cover_page(doc, doc_type, date, description): - p_type = doc.add_paragraph() + """ + Add a cover page: + - Centered horizontally and vertically using a full-page table. + - Lines: + 1) Document type + 2) Date (e.g. "June 14, 2026") + 3-5) Empty space + 6) One-sentence description + - Then page break. + """ + # Create a full-page table to center content vertically and horizontally + table = doc.add_table(rows=1, cols=1) + table.autofit = False + cell = table.cell(0, 0) + + # Make table span full page height (approx) + cell.width = Inches(6.5) + + # Center content inside the cell + for paragraph in cell.paragraphs: + paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER + + # Clear default paragraph + cell.paragraphs[0].clear() + + # Line 1: Document type + p_type = cell.add_paragraph() + p_type.alignment = WD_ALIGN_PARAGRAPH.CENTER run_type = p_type.add_run(doc_type) run_type.bold = True run_type.font.name = "Courier" run_type.font.size = Pt(12) - p_date = doc.add_paragraph() + # Line 2: Date + p_date = cell.add_paragraph() + p_date.alignment = WD_ALIGN_PARAGRAPH.CENTER run_date = p_date.add_run(date) run_date.font.name = "Courier" run_date.font.size = Pt(12) + # Lines 3-5: blank space for _ in range(3): - doc.add_paragraph() + cell.add_paragraph() - p_desc = doc.add_paragraph() + # Line 6: Description + p_desc = cell.add_paragraph() + p_desc.alignment = WD_ALIGN_PARAGRAPH.CENTER run_desc = p_desc.add_run(description) run_desc.font.name = "Courier" run_desc.font.size = Pt(12) + # Page break after cover page doc.add_page_break() @@ -425,6 +466,7 @@ def _add_summary_content(doc, text): if not stripped: continue + # Detect markdown-style headings: #, ##, ###, #### at start of line m = re.match(r"^(#{1,4})\s+(.*)", stripped) if m: heading_count += 1 @@ -437,6 +479,7 @@ def _add_summary_content(doc, text): run.font.name = "Courier" run.font.size = Pt(12) + # Apply formatting based on this heading's ordinal position if heading_count == 1: run.bold = True elif heading_count == 2: @@ -447,6 +490,7 @@ def _add_summary_content(doc, text): run.italic = True run.underline = True else: + # Normal text line p = doc.add_paragraph(stripped) p.paragraph_format.space_after = Pt(4) @@ -462,11 +506,12 @@ def create_transcript_docx( Create a .docx transcript with: - 1.5" left margin, 1" right margin - 12pt Courier - - Continuous line numbering on the left + - Continuous line numbering on the left (for transcript content only) - Optional cover page with type, date, and AI-generated description. """ doc = Document() - _setup_docx_style(doc) + # Enable line numbering for transcript + _setup_docx_style(doc, enable_line_numbering=True) if include_cover and cover_date and cover_desc: _add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc) @@ -485,9 +530,11 @@ def create_summary_docx( """ Create a .docx summary with consistent font and heading styles. Optional cover page with type, date, and AI-generated description. + No line numbering. """ doc = Document() - _setup_docx_style(doc) + # No line numbering for summary + _setup_docx_style(doc, enable_line_numbering=False) if include_cover and cover_date and cover_desc: _add_cover_page(doc, "SUMMARY", cover_date, cover_desc) @@ -507,25 +554,44 @@ def create_combined_docx( ): """ Create a combined .docx with: - 1) Transcript cover page + 1) Transcript cover page (no line numbering) 2) Page break - 3) Summary content + 3) Summary content (no line numbering) 4) Page break - 5) Transcript content + 5) Transcript content (line numbering enabled) """ doc = Document() - _setup_docx_style(doc) + # Start with no line numbering (for cover and summary) + _setup_docx_style(doc, enable_line_numbering=False) # 1) Transcript cover page (includes trailing page break) _add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc) - # 3) Summary content + # 3) Summary content (no line numbering) _add_summary_content(doc, summary_text) # 4) Page break before transcript doc.add_page_break() - # 5) Transcript content + # Enable line numbering for transcript section + # We create a new section for transcript so line numbering applies only there + section_transcript = doc.add_section() + # Apply same margins + section_transcript.left_margin = Inches(1.5) + section_transcript.right_margin = Inches(1.0) + section_transcript.top_margin = Inches(1.0) + section_transcript.bottom_margin = Inches(1.0) + + # Enable line numbering in transcript section + sectPr = section_transcript._sectPr + lnNumType = sectPr.find(qn("w:lnNumType")) + if lnNumType is None: + lnNumType = OxmlElement("w:lnNumType") + sectPr.append(lnNumType) + lnNumType.set(qn("w:start"), "continuous") + lnNumType.set(qn("w:countBy"), "1") + + # 5) Transcript content (with line numbering) _add_transcript_content(doc, transcript_text) doc.save(filename) diff --git a/scraibe/tasks.py b/scraibe/tasks.py index a7ca0b6..a965141 100644 --- a/scraibe/tasks.py +++ b/scraibe/tasks.py @@ -418,7 +418,8 @@ def process_transcription_task( # 3c) Generate short cover-page descriptions using summarizer transcript_cover_desc = "" summary_cover_desc = "" - today_str = datetime.utcnow().strftime("%Y-%m-%d") + dt = datetime.utcnow() + today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}" try: scraibe._ensure_summarizer()