Refactor PDF generation and attachment logic
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

- Generate PDFs by:
  - Creating individual .docx components (cover, transcript, summary)
  - Converting each .docx to PDF
  - Merging PDFs in correct order
  - Adding page numbers to final PDFs

- Transcribe & Summarize:
  - Attach: JSON, transcript MD, summary MD, TRANSCRIPT.pdf, SUMMARY.pdf, COMBINED.pdf

- Transcribe only:
  - Attach: JSON, transcript MD, TRANSCRIPT.pdf

- Ensure transcript line numbering is isolated to its own .docx before PDF merge
This commit is contained in:
admin
2026-06-15 03:16:53 +00:00
parent 7ece1a50c2
commit 237bd4b37c
2 changed files with 81 additions and 67 deletions
+12 -2
View File
@@ -546,12 +546,16 @@ def process_transcription_task(
temp_files.append(docx_combined_path)
# 4b) Generate PDF documents
# Always use transcript text; use summary_text only if include_summary is True
pdf_summary_text = summary_text if include_summary else ""
pdf_files = {}
try:
pdf_output_dir = tempfile.mkdtemp(prefix="pdf_output_")
pdf_files = generate_pdf_documents(
transcript_text=transcript_text,
summary_text=summary_text if include_summary else "",
summary_text=pdf_summary_text,
output_dir=pdf_output_dir,
transcript_cover_date=today_str,
transcript_cover_desc=transcript_cover_desc,
@@ -565,17 +569,23 @@ def process_transcription_task(
logger.warning("PDF generation failed (continuing with DOCX attachments): %s", e)
# 5) Build attachments list
# Base attachments: JSON, transcript MD, transcript DOCX
attachments = [
md_transcript_path,
docx_transcript_path,
json_path,
]
# If summary is present, add summary MD and DOCXs
if summary_text:
attachments += [md_summary_path, docx_summary_path]
if docx_combined_path:
attachments.append(docx_combined_path)
# Add PDFs to attachments
# Add PDFs based on mode:
# - Always: TRANSCRIPT.pdf
# - If summary: SUMMARY.pdf and COMBINED.pdf
if pdf_files.get("transcript_pdf"):
attachments.append(pdf_files["transcript_pdf"])
if pdf_files.get("summary_pdf"):