Add PDF-based document generation with LibreOffice; fix line numbering and margins
- Add LibreOffice Writer and DejaVu fonts to Dockerfile for PDF generation
- Add PyPDF2 and reportlab to requirements.txt
- Refactor email_sender.py:
- Enforce 1-inch margins on all sides
- Isolate line numbering to transcript section only
- Add generate_pdf_documents() to build:
- TRANSCRIPT.pdf (cover + transcript)
- SUMMARY.pdf (cover + summary)
- COMBINED.pdf (transcript cover + summary + TRANSCRIPT header + transcript)
- Add page numbers (bottom-right) to all PDFs via reportlab
- Update tasks.py:
- Use generate_pdf_documents() after creating DOCX files
- Attach source JSON, MD files, and compiled PDFs in success email
- Add test_docx_generation.py for transcript/summary/combined DOCX testing
This commit is contained in:
+31
-3
@@ -14,7 +14,7 @@ from .summarizer import SummarizerClient, SummarizerError
|
||||
from .misc import setup_logging
|
||||
from .email_sender import send_email, EmailError, load_template
|
||||
from .email_sender import create_transcript_docx, create_summary_docx
|
||||
from .email_sender import create_combined_docx
|
||||
from .email_sender import create_combined_docx, generate_pdf_documents
|
||||
|
||||
logger = logging.getLogger("scraibe.tasks")
|
||||
|
||||
@@ -545,6 +545,26 @@ def process_transcription_task(
|
||||
)
|
||||
temp_files.append(docx_combined_path)
|
||||
|
||||
# 4b) Generate PDF documents
|
||||
pdf_files = {}
|
||||
try:
|
||||
pdf_output_dir = tempfile.mkdtemp(prefix="pdf_output_")
|
||||
pdf_files = generate_pdf_documents(
|
||||
transcript_text=transcript_text,
|
||||
summary_text=summary_text if include_summary else "",
|
||||
output_dir=pdf_output_dir,
|
||||
transcript_cover_date=today_str,
|
||||
transcript_cover_desc=transcript_cover_desc,
|
||||
summary_cover_date=today_str,
|
||||
summary_cover_desc=summary_cover_desc,
|
||||
)
|
||||
for pdf_path in pdf_files.values():
|
||||
if pdf_path and os.path.exists(pdf_path):
|
||||
temp_files.append(pdf_path)
|
||||
except Exception as e:
|
||||
logger.warning("PDF generation failed (continuing with DOCX attachments): %s", e)
|
||||
|
||||
# 5) Build attachments list
|
||||
attachments = [
|
||||
md_transcript_path,
|
||||
docx_transcript_path,
|
||||
@@ -555,7 +575,15 @@ def process_transcription_task(
|
||||
if docx_combined_path:
|
||||
attachments.append(docx_combined_path)
|
||||
|
||||
# 5) Send success email
|
||||
# Add PDFs to attachments
|
||||
if pdf_files.get("transcript_pdf"):
|
||||
attachments.append(pdf_files["transcript_pdf"])
|
||||
if pdf_files.get("summary_pdf"):
|
||||
attachments.append(pdf_files["summary_pdf"])
|
||||
if pdf_files.get("combined_pdf"):
|
||||
attachments.append(pdf_files["combined_pdf"])
|
||||
|
||||
# 6) Send success email
|
||||
send_success_email(
|
||||
to=email_to,
|
||||
transcript_text=transcript_text,
|
||||
@@ -575,7 +603,7 @@ def process_transcription_task(
|
||||
)
|
||||
raise e
|
||||
finally:
|
||||
# 6) Cleanup
|
||||
# 7) Cleanup
|
||||
for path in temp_files:
|
||||
_remove_file(path)
|
||||
if audio_path:
|
||||
|
||||
Reference in New Issue
Block a user