Revert PDF generation; simplify to DOCX + MD + JSON only
- Remove PDF helpers, LibreOffice, PyPDF2, reportlab - Transcript DOCX: standalone, no cover page, with line numbering - Summary DOCX: standalone, no cover page, no line numbering - Attachments: - Transcribe: JSON, transcript MD, transcript DOCX - Transcribe & Summarize: JSON, transcript MD, transcript DOCX, summary MD, summary DOCX
This commit is contained in:
+2
-7
@@ -9,14 +9,9 @@ LABEL description="Scraibe: LocalAI-backed transcription and diarization client
|
|||||||
Sends audio to a LocalAI server running vibevoice.cpp and uses a second LLM for summarization."
|
Sends audio to a LocalAI server running vibevoice.cpp and uses a second LLM for summarization."
|
||||||
LABEL url="https://git.optimex.systems/admin/scribe"
|
LABEL url="https://git.optimex.systems/admin/scribe"
|
||||||
|
|
||||||
# Install system dependencies (ffmpeg, redis, LibreOffice for PDF generation)
|
# Install system dependencies (ffmpeg, redis)
|
||||||
RUN apt update -y && \
|
RUN apt update -y && \
|
||||||
apt install -y --no-install-recommends \
|
apt install -y --no-install-recommends ffmpeg redis-server && \
|
||||||
ffmpeg \
|
|
||||||
redis-server \
|
|
||||||
libreoffice-writer \
|
|
||||||
fonts-dejavu-core \
|
|
||||||
&& \
|
|
||||||
apt clean && \
|
apt clean && \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
|
||||||
|
|||||||
@@ -6,5 +6,3 @@ PyYAML>=6.0
|
|||||||
celery[redis]>=5.3.0
|
celery[redis]>=5.3.0
|
||||||
redis>=5.0.0
|
redis>=5.0.0
|
||||||
python-docx>=1.1.0
|
python-docx>=1.1.0
|
||||||
PyPDF2>=3.0.0
|
|
||||||
reportlab>=4.0.0
|
|
||||||
|
|||||||
+7
-357
@@ -13,8 +13,6 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import smtplib
|
import smtplib
|
||||||
import subprocess
|
|
||||||
import tempfile
|
|
||||||
from email import encoders
|
from email import encoders
|
||||||
from email.mime.base import MIMEBase
|
from email.mime.base import MIMEBase
|
||||||
from email.mime.multipart import MIMEMultipart
|
from email.mime.multipart import MIMEMultipart
|
||||||
@@ -336,58 +334,6 @@ def _enable_line_numbering(section, start_at=1, count_by=1, restart=True):
|
|||||||
sectPr.append(lnNumType)
|
sectPr.append(lnNumType)
|
||||||
|
|
||||||
|
|
||||||
def _add_cover_page(doc, doc_type, date, description):
|
|
||||||
"""
|
|
||||||
Add a cover page:
|
|
||||||
- Centered horizontally and vertically using a full-page table.
|
|
||||||
- Lines:
|
|
||||||
1) Document type
|
|
||||||
2) Date (e.g. "June 14, 2026")
|
|
||||||
3-5) Empty space
|
|
||||||
6) One-sentence description
|
|
||||||
"""
|
|
||||||
# Create a full-page table to center content vertically and horizontally
|
|
||||||
table = doc.add_table(rows=1, cols=1)
|
|
||||||
table.autofit = False
|
|
||||||
cell = table.cell(0, 0)
|
|
||||||
|
|
||||||
# Make table span full page width (approx)
|
|
||||||
cell.width = Inches(6.5)
|
|
||||||
|
|
||||||
# Center content inside the cell
|
|
||||||
for paragraph in cell.paragraphs:
|
|
||||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
||||||
|
|
||||||
# Clear default paragraph
|
|
||||||
cell.paragraphs[0].clear()
|
|
||||||
|
|
||||||
# Line 1: Document type
|
|
||||||
p_type = cell.add_paragraph()
|
|
||||||
p_type.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
||||||
run_type = p_type.add_run(doc_type)
|
|
||||||
run_type.bold = True
|
|
||||||
run_type.font.name = "Courier"
|
|
||||||
run_type.font.size = Pt(12)
|
|
||||||
|
|
||||||
# Line 2: Date
|
|
||||||
p_date = cell.add_paragraph()
|
|
||||||
p_date.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
||||||
run_date = p_date.add_run(date)
|
|
||||||
run_date.font.name = "Courier"
|
|
||||||
run_date.font.size = Pt(12)
|
|
||||||
|
|
||||||
# Lines 3-5: blank space
|
|
||||||
for _ in range(3):
|
|
||||||
cell.add_paragraph()
|
|
||||||
|
|
||||||
# Line 6: Description
|
|
||||||
p_desc = cell.add_paragraph()
|
|
||||||
p_desc.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
||||||
run_desc = p_desc.add_run(description)
|
|
||||||
run_desc.font.name = "Courier"
|
|
||||||
run_desc.font.size = Pt(12)
|
|
||||||
|
|
||||||
|
|
||||||
def _add_transcript_content(doc, text):
|
def _add_transcript_content(doc, text):
|
||||||
"""
|
"""
|
||||||
Add transcript lines to the document with formatting.
|
Add transcript lines to the document with formatting.
|
||||||
@@ -467,146 +413,27 @@ def _add_summary_content(doc, text):
|
|||||||
p.paragraph_format.space_after = Pt(4)
|
p.paragraph_format.space_after = Pt(4)
|
||||||
|
|
||||||
|
|
||||||
# ------------ PDF helpers ------------
|
# ------------ Public DOCX functions ------------
|
||||||
|
|
||||||
def _docx_to_pdf(docx_path: str, output_dir: str) -> str:
|
|
||||||
"""
|
|
||||||
Convert a .docx file to PDF using LibreOffice.
|
|
||||||
Returns the path of the generated PDF, or None on failure.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Use LibreOffice in headless mode
|
|
||||||
result = subprocess.run(
|
|
||||||
[
|
|
||||||
"libreoffice",
|
|
||||||
"--headless",
|
|
||||||
"--convert-to", "pdf",
|
|
||||||
"--outdir", output_dir,
|
|
||||||
docx_path,
|
|
||||||
],
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
timeout=120,
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode != 0:
|
|
||||||
logger.warning("LibreOffice conversion failed: %s", result.stderr)
|
|
||||||
|
|
||||||
# LibreOffice creates a PDF with the same base name
|
|
||||||
base = os.path.splitext(os.path.basename(docx_path))[0]
|
|
||||||
pdf_path = os.path.join(output_dir, f"{base}.pdf")
|
|
||||||
|
|
||||||
if os.path.exists(pdf_path):
|
|
||||||
return pdf_path
|
|
||||||
else:
|
|
||||||
logger.warning("Converted PDF not found at: %s", pdf_path)
|
|
||||||
return None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Error converting DOCX to PDF: %s", e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _merge_pdfs(input_pdfs: List[str], output_pdf: str) -> bool:
|
|
||||||
"""
|
|
||||||
Merge multiple PDF files into a single PDF using PyPDF2.
|
|
||||||
Returns True on success, False on failure.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from PyPDF2 import PdfMerger
|
|
||||||
|
|
||||||
merger = PdfMerger()
|
|
||||||
for pdf in input_pdfs:
|
|
||||||
if os.path.exists(pdf):
|
|
||||||
merger.append(pdf)
|
|
||||||
|
|
||||||
merger.write(output_pdf)
|
|
||||||
merger.close()
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Error merging PDFs: %s", e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _add_page_numbers_to_pdf(input_pdf: str, output_pdf: str) -> bool:
|
|
||||||
"""
|
|
||||||
Add page numbers to a PDF using reportlab.
|
|
||||||
Page numbers appear at the bottom-right of each page.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from reportlab.pdfgen import canvas
|
|
||||||
from reportlab.lib.pagesizes import letter
|
|
||||||
from reportlab.pdfbase import pdfmetrics
|
|
||||||
from reportlab.pdfbase.ttfonts import TTFont
|
|
||||||
from PyPDF2 import PdfReader
|
|
||||||
|
|
||||||
# Try to use a standard font
|
|
||||||
font_name = "Courier"
|
|
||||||
try:
|
|
||||||
pdfmetrics.registerFont(
|
|
||||||
TTFont("Courier", "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf")
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
reader = PdfReader(input_pdf)
|
|
||||||
num_pages = len(reader.pages)
|
|
||||||
|
|
||||||
# Create a canvas to add page numbers
|
|
||||||
c = canvas.Canvas(output_pdf, pagesize=letter)
|
|
||||||
|
|
||||||
for page_num in range(num_pages):
|
|
||||||
page = reader.pages[page_num]
|
|
||||||
c = canvas.Canvas(output_pdf, pagesize=letter)
|
|
||||||
|
|
||||||
# Add page number
|
|
||||||
c.setFont(font_name, 10)
|
|
||||||
page_text = f"Page {page_num + 1} of {num_pages}"
|
|
||||||
text_width = c.stringWidth(page_text, font_name, 10)
|
|
||||||
x = letter[0] - 1 * 72 - text_width # 1 inch from right
|
|
||||||
y = 1 * 72 # 1 inch from bottom
|
|
||||||
c.drawString(x, y, page_text)
|
|
||||||
|
|
||||||
c.showPage()
|
|
||||||
c.save()
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Error adding page numbers to PDF: %s", e)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# ------------ Public DOCX/PDF functions ------------
|
|
||||||
|
|
||||||
def create_transcript_docx(
|
def create_transcript_docx(
|
||||||
text: str,
|
text: str,
|
||||||
filename: str,
|
filename: str,
|
||||||
include_cover: bool = False,
|
|
||||||
cover_date: str = "",
|
|
||||||
cover_desc: str = "",
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a .docx transcript with:
|
Create a .docx transcript with:
|
||||||
- 1" margins on all sides
|
- 1" margins on all sides
|
||||||
- 12pt Courier
|
- 12pt Courier
|
||||||
- Continuous line numbering on the left (for transcript content only)
|
- Continuous line numbering on the left
|
||||||
- Optional cover page with type, date, and AI-generated description.
|
- No cover page (standalone transcript only).
|
||||||
"""
|
"""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
_configure_base_font(doc)
|
_configure_base_font(doc)
|
||||||
_configure_section_margins(doc)
|
_configure_section_margins(doc)
|
||||||
|
|
||||||
# 1) Optional cover page (no line numbering)
|
# Enable line numbering for transcript section
|
||||||
if include_cover and cover_date and cover_desc:
|
|
||||||
_add_cover_page(doc, "TRANSCRIPT", cover_date, cover_desc)
|
|
||||||
doc.add_page_break()
|
|
||||||
|
|
||||||
# 2) Enable line numbering for transcript section
|
|
||||||
_enable_line_numbering(doc.sections[0])
|
_enable_line_numbering(doc.sections[0])
|
||||||
|
|
||||||
# 3) Transcript content (with line numbering)
|
# Transcript content (with line numbering)
|
||||||
_add_transcript_content(doc, text)
|
_add_transcript_content(doc, text)
|
||||||
|
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
@@ -615,198 +442,21 @@ def create_transcript_docx(
|
|||||||
def create_summary_docx(
|
def create_summary_docx(
|
||||||
text: str,
|
text: str,
|
||||||
filename: str,
|
filename: str,
|
||||||
include_cover: bool = False,
|
|
||||||
cover_date: str = "",
|
|
||||||
cover_desc: str = "",
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a .docx summary with consistent font and heading styles.
|
Create a .docx summary with consistent font and heading styles.
|
||||||
Optional cover page with type, date, and AI-generated description.
|
No cover page, no line numbering.
|
||||||
No line numbering.
|
|
||||||
"""
|
"""
|
||||||
doc = Document()
|
doc = Document()
|
||||||
_configure_base_font(doc)
|
_configure_base_font(doc)
|
||||||
_configure_section_margins(doc)
|
_configure_section_margins(doc)
|
||||||
|
|
||||||
# 1) Optional cover page
|
# Summary content (no line numbering)
|
||||||
if include_cover and cover_date and cover_desc:
|
|
||||||
_add_cover_page(doc, "SUMMARY", cover_date, cover_desc)
|
|
||||||
doc.add_page_break()
|
|
||||||
|
|
||||||
# 2) Summary content (no line numbering)
|
|
||||||
_add_summary_content(doc, text)
|
_add_summary_content(doc, text)
|
||||||
|
|
||||||
doc.save(filename)
|
doc.save(filename)
|
||||||
|
|
||||||
|
|
||||||
def create_combined_docx(
|
|
||||||
transcript_text: str,
|
|
||||||
summary_text: str,
|
|
||||||
filename: str,
|
|
||||||
transcript_cover_date: str,
|
|
||||||
transcript_cover_desc: str,
|
|
||||||
summary_cover_date: str,
|
|
||||||
summary_cover_desc: str,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Create a combined .docx with:
|
|
||||||
1) Transcript cover page (no line numbering)
|
|
||||||
2) Page break
|
|
||||||
3) Summary content (no line numbering)
|
|
||||||
4) Page break
|
|
||||||
5) Transcript content (line numbering enabled only here)
|
|
||||||
|
|
||||||
Line numbering is restricted to the transcript section only.
|
|
||||||
"""
|
|
||||||
doc = Document()
|
|
||||||
_configure_base_font(doc)
|
|
||||||
_configure_section_margins(doc)
|
|
||||||
|
|
||||||
# 1) Transcript cover page (no line numbering)
|
|
||||||
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
|
||||||
doc.add_page_break()
|
|
||||||
|
|
||||||
# 3) Summary content (no line numbering)
|
|
||||||
_add_summary_content(doc, summary_text)
|
|
||||||
doc.add_page_break()
|
|
||||||
|
|
||||||
# 4) Enable line numbering for transcript section
|
|
||||||
_enable_line_numbering(doc.sections[0])
|
|
||||||
|
|
||||||
# 5) Transcript content (with line numbering)
|
|
||||||
_add_transcript_content(doc, transcript_text)
|
|
||||||
|
|
||||||
doc.save(filename)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_pdf_documents(
|
|
||||||
transcript_text: str,
|
|
||||||
summary_text: str,
|
|
||||||
output_dir: str,
|
|
||||||
transcript_cover_date: str,
|
|
||||||
transcript_cover_desc: str,
|
|
||||||
summary_cover_date: str,
|
|
||||||
summary_cover_desc: str,
|
|
||||||
) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Generate PDF documents by:
|
|
||||||
- Creating individual .docx components (cover pages, transcript, summary)
|
|
||||||
- Converting each .docx to PDF
|
|
||||||
- Merging PDFs in the correct order
|
|
||||||
- Adding page numbers to the final PDFs
|
|
||||||
|
|
||||||
Behavior:
|
|
||||||
- Always:
|
|
||||||
- Generate TRANSCRIPT.pdf:
|
|
||||||
- transcript_cover.pdf + transcript_with_line_numbers.pdf
|
|
||||||
- If summary_text is provided:
|
|
||||||
- Generate SUMMARY.pdf:
|
|
||||||
- summary_cover.pdf + summary.pdf
|
|
||||||
- Generate COMBINED.pdf:
|
|
||||||
- transcript_cover.pdf + summary.pdf + transcript_with_line_numbers.pdf
|
|
||||||
|
|
||||||
Returns a dict with paths:
|
|
||||||
- transcript_pdf (always)
|
|
||||||
- summary_pdf (if summary_text provided)
|
|
||||||
- combined_pdf (if summary_text provided)
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
|
||||||
|
|
||||||
result = {}
|
|
||||||
|
|
||||||
# Create temporary directory for intermediate files
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
|
||||||
# 1) Generate constituent DOCX files
|
|
||||||
|
|
||||||
# Transcript cover
|
|
||||||
transcript_cover_docx = os.path.join(tmpdir, "TRANSCRIPT_COVER.docx")
|
|
||||||
doc = Document()
|
|
||||||
_configure_base_font(doc)
|
|
||||||
_configure_section_margins(doc)
|
|
||||||
_add_cover_page(doc, "TRANSCRIPT", transcript_cover_date, transcript_cover_desc)
|
|
||||||
doc.save(transcript_cover_docx)
|
|
||||||
|
|
||||||
# Summary cover (only if summary is requested)
|
|
||||||
summary_cover_docx = None
|
|
||||||
if summary_text:
|
|
||||||
summary_cover_docx = os.path.join(tmpdir, "SUMMARY_COVER.docx")
|
|
||||||
doc = Document()
|
|
||||||
_configure_base_font(doc)
|
|
||||||
_configure_section_margins(doc)
|
|
||||||
_add_cover_page(doc, "SUMMARY", summary_cover_date, summary_cover_desc)
|
|
||||||
doc.save(summary_cover_docx)
|
|
||||||
|
|
||||||
# Transcript (with line numbering)
|
|
||||||
transcript_docx = os.path.join(tmpdir, "TRANSCRIPT.docx")
|
|
||||||
doc = Document()
|
|
||||||
_configure_base_font(doc)
|
|
||||||
_configure_section_margins(doc)
|
|
||||||
_enable_line_numbering(doc.sections[0])
|
|
||||||
_add_transcript_content(doc, transcript_text)
|
|
||||||
doc.save(transcript_docx)
|
|
||||||
|
|
||||||
# Summary (no line numbering)
|
|
||||||
summary_docx = None
|
|
||||||
if summary_text:
|
|
||||||
summary_docx = os.path.join(tmpdir, "SUMMARY.docx")
|
|
||||||
doc = Document()
|
|
||||||
_configure_base_font(doc)
|
|
||||||
_configure_section_margins(doc)
|
|
||||||
_add_summary_content(doc, summary_text)
|
|
||||||
doc.save(summary_docx)
|
|
||||||
|
|
||||||
# 2) Convert DOCX to PDF
|
|
||||||
|
|
||||||
transcript_cover_pdf = _docx_to_pdf(transcript_cover_docx, tmpdir)
|
|
||||||
transcript_pdf = _docx_to_pdf(transcript_docx, tmpdir)
|
|
||||||
|
|
||||||
summary_cover_pdf = None
|
|
||||||
summary_pdf = None
|
|
||||||
if summary_text:
|
|
||||||
summary_cover_pdf = _docx_to_pdf(summary_cover_docx, tmpdir)
|
|
||||||
summary_pdf = _docx_to_pdf(summary_docx, tmpdir)
|
|
||||||
|
|
||||||
# 3) Assemble TRANSCRIPT.pdf: transcript_cover + transcript + page numbers
|
|
||||||
transcript_output_pdf = os.path.join(output_dir, "TRANSCRIPT.pdf")
|
|
||||||
merged_transcript = os.path.join(tmpdir, "TRANSCRIPT_MERGED.pdf")
|
|
||||||
if (
|
|
||||||
transcript_cover_pdf
|
|
||||||
and transcript_pdf
|
|
||||||
and _merge_pdfs([transcript_cover_pdf, transcript_pdf], merged_transcript)
|
|
||||||
and _add_page_numbers_to_pdf(merged_transcript, transcript_output_pdf)
|
|
||||||
):
|
|
||||||
result["transcript_pdf"] = transcript_output_pdf
|
|
||||||
|
|
||||||
# 4) If summary is provided:
|
|
||||||
# - SUMMARY.pdf: summary_cover + summary + page numbers
|
|
||||||
# - COMBINED.pdf: transcript_cover + summary + transcript + page numbers
|
|
||||||
if summary_text and summary_pdf and summary_cover_pdf:
|
|
||||||
# SUMMARY.pdf
|
|
||||||
summary_output_pdf = os.path.join(output_dir, "SUMMARY.pdf")
|
|
||||||
merged_summary = os.path.join(tmpdir, "SUMMARY_MERGED.pdf")
|
|
||||||
if (
|
|
||||||
_merge_pdfs([summary_cover_pdf, summary_pdf], merged_summary)
|
|
||||||
and _add_page_numbers_to_pdf(merged_summary, summary_output_pdf)
|
|
||||||
):
|
|
||||||
result["summary_pdf"] = summary_output_pdf
|
|
||||||
|
|
||||||
# COMBINED.pdf
|
|
||||||
combined_output_pdf = os.path.join(output_dir, "COMBINED.pdf")
|
|
||||||
merged_combined = os.path.join(tmpdir, "COMBINED_MERGED.pdf")
|
|
||||||
if (
|
|
||||||
_merge_pdfs(
|
|
||||||
[transcript_cover_pdf, summary_pdf, transcript_pdf],
|
|
||||||
merged_combined,
|
|
||||||
)
|
|
||||||
and _add_page_numbers_to_pdf(merged_combined, combined_output_pdf)
|
|
||||||
):
|
|
||||||
result["combined_pdf"] = combined_output_pdf
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def send_success_email(
|
def send_success_email(
|
||||||
to: str,
|
to: str,
|
||||||
transcript_text: str,
|
transcript_text: str,
|
||||||
|
|||||||
+4
-113
@@ -14,7 +14,6 @@ from .summarizer import SummarizerClient, SummarizerError
|
|||||||
from .misc import setup_logging
|
from .misc import setup_logging
|
||||||
from .email_sender import send_email, EmailError, load_template
|
from .email_sender import send_email, EmailError, load_template
|
||||||
from .email_sender import create_transcript_docx, create_summary_docx
|
from .email_sender import create_transcript_docx, create_summary_docx
|
||||||
from .email_sender import create_combined_docx, generate_pdf_documents
|
|
||||||
|
|
||||||
logger = logging.getLogger("scraibe.tasks")
|
logger = logging.getLogger("scraibe.tasks")
|
||||||
|
|
||||||
@@ -415,59 +414,6 @@ def process_transcription_task(
|
|||||||
)
|
)
|
||||||
speaker_map = {}
|
speaker_map = {}
|
||||||
|
|
||||||
# 3c) Generate short cover-page descriptions using summarizer
|
|
||||||
transcript_cover_desc = ""
|
|
||||||
summary_cover_desc = ""
|
|
||||||
dt = datetime.utcnow()
|
|
||||||
today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
scraibe._ensure_summarizer()
|
|
||||||
summarizer = scraibe._summarizer
|
|
||||||
|
|
||||||
# Transcript description
|
|
||||||
prompt = (
|
|
||||||
"In one short sentence (max 25 words), describe the content of this transcript "
|
|
||||||
"for use on a cover page. Do not include speaker names. Output only the sentence.\n\n"
|
|
||||||
+ transcript_text
|
|
||||||
)
|
|
||||||
response = summarizer._chat_completion(
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
temperature=0.3,
|
|
||||||
max_tokens=60,
|
|
||||||
)
|
|
||||||
transcript_cover_desc = (
|
|
||||||
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
||||||
).strip()
|
|
||||||
if not transcript_cover_desc:
|
|
||||||
transcript_cover_desc = "Transcript of the recorded conversation."
|
|
||||||
logger.info("Transcript cover description: %s", transcript_cover_desc)
|
|
||||||
|
|
||||||
# Summary description (if summary requested)
|
|
||||||
if include_summary:
|
|
||||||
prompt = (
|
|
||||||
"In one short sentence (max 25 words), describe the content of this summary "
|
|
||||||
"for use on a cover page. Output only the sentence.\n\n"
|
|
||||||
+ summary_text
|
|
||||||
)
|
|
||||||
response = summarizer._chat_completion(
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
temperature=0.3,
|
|
||||||
max_tokens=60,
|
|
||||||
)
|
|
||||||
summary_cover_desc = (
|
|
||||||
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
|
|
||||||
).strip()
|
|
||||||
if not summary_cover_desc:
|
|
||||||
summary_cover_desc = "Summary of the recorded conversation."
|
|
||||||
logger.info("Summary cover description: %s", summary_cover_desc)
|
|
||||||
|
|
||||||
except (SummarizerError, Exception) as e:
|
|
||||||
logger.warning("Failed to generate cover descriptions: %s", e)
|
|
||||||
transcript_cover_desc = transcript_cover_desc or "Transcript of the recorded conversation."
|
|
||||||
if include_summary:
|
|
||||||
summary_cover_desc = summary_cover_desc or "Summary of the recorded conversation."
|
|
||||||
|
|
||||||
# 4) Prepare files
|
# 4) Prepare files
|
||||||
|
|
||||||
# Transcript .md
|
# Transcript .md
|
||||||
@@ -477,14 +423,11 @@ def process_transcription_task(
|
|||||||
f.write(transcript_text)
|
f.write(transcript_text)
|
||||||
temp_files.append(md_transcript_path)
|
temp_files.append(md_transcript_path)
|
||||||
|
|
||||||
# Transcript .docx (standalone, with cover page)
|
# Transcript .docx (standalone, no cover page)
|
||||||
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
|
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
|
||||||
create_transcript_docx(
|
create_transcript_docx(
|
||||||
transcript_text,
|
transcript_text,
|
||||||
docx_transcript_path,
|
docx_transcript_path,
|
||||||
include_cover=True,
|
|
||||||
cover_date=today_str,
|
|
||||||
cover_desc=transcript_cover_desc,
|
|
||||||
)
|
)
|
||||||
temp_files.append(docx_transcript_path)
|
temp_files.append(docx_transcript_path)
|
||||||
|
|
||||||
@@ -511,7 +454,6 @@ def process_transcription_task(
|
|||||||
# Summary files (if present)
|
# Summary files (if present)
|
||||||
md_summary_path = None
|
md_summary_path = None
|
||||||
docx_summary_path = None
|
docx_summary_path = None
|
||||||
docx_combined_path = None
|
|
||||||
|
|
||||||
if summary_text:
|
if summary_text:
|
||||||
# Summary .md
|
# Summary .md
|
||||||
@@ -521,77 +463,26 @@ def process_transcription_task(
|
|||||||
f.write(summary_text)
|
f.write(summary_text)
|
||||||
temp_files.append(md_summary_path)
|
temp_files.append(md_summary_path)
|
||||||
|
|
||||||
# Summary .docx (standalone, with cover page)
|
# Summary .docx (standalone, no cover page)
|
||||||
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
|
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
|
||||||
create_summary_docx(
|
create_summary_docx(
|
||||||
summary_text,
|
summary_text,
|
||||||
docx_summary_path,
|
docx_summary_path,
|
||||||
include_cover=True,
|
|
||||||
cover_date=today_str,
|
|
||||||
cover_desc=summary_cover_desc,
|
|
||||||
)
|
)
|
||||||
temp_files.append(docx_summary_path)
|
temp_files.append(docx_summary_path)
|
||||||
|
|
||||||
# Combined .docx: Transcript + Summary with cover pages
|
|
||||||
docx_combined_path = _safe_filename("TRANSCRIPT_AND_SUMMARY", local, date_tag, ".docx")
|
|
||||||
create_combined_docx(
|
|
||||||
transcript_text=transcript_text,
|
|
||||||
summary_text=summary_text,
|
|
||||||
filename=docx_combined_path,
|
|
||||||
transcript_cover_date=today_str,
|
|
||||||
transcript_cover_desc=transcript_cover_desc,
|
|
||||||
summary_cover_date=today_str,
|
|
||||||
summary_cover_desc=summary_cover_desc,
|
|
||||||
)
|
|
||||||
temp_files.append(docx_combined_path)
|
|
||||||
|
|
||||||
# 4b) Generate PDF documents
|
|
||||||
|
|
||||||
# Always use transcript text; use summary_text only if include_summary is True
|
|
||||||
pdf_summary_text = summary_text if include_summary else ""
|
|
||||||
|
|
||||||
pdf_files = {}
|
|
||||||
try:
|
|
||||||
pdf_output_dir = tempfile.mkdtemp(prefix="pdf_output_")
|
|
||||||
pdf_files = generate_pdf_documents(
|
|
||||||
transcript_text=transcript_text,
|
|
||||||
summary_text=pdf_summary_text,
|
|
||||||
output_dir=pdf_output_dir,
|
|
||||||
transcript_cover_date=today_str,
|
|
||||||
transcript_cover_desc=transcript_cover_desc,
|
|
||||||
summary_cover_date=today_str,
|
|
||||||
summary_cover_desc=summary_cover_desc,
|
|
||||||
)
|
|
||||||
for pdf_path in pdf_files.values():
|
|
||||||
if pdf_path and os.path.exists(pdf_path):
|
|
||||||
temp_files.append(pdf_path)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("PDF generation failed (continuing with DOCX attachments): %s", e)
|
|
||||||
|
|
||||||
# 5) Build attachments list
|
# 5) Build attachments list
|
||||||
|
|
||||||
# Base attachments: JSON, transcript MD, transcript DOCX
|
# Always: JSON, transcript MD, transcript DOCX
|
||||||
attachments = [
|
attachments = [
|
||||||
md_transcript_path,
|
md_transcript_path,
|
||||||
docx_transcript_path,
|
docx_transcript_path,
|
||||||
json_path,
|
json_path,
|
||||||
]
|
]
|
||||||
|
|
||||||
# If summary is present, add summary MD and DOCXs
|
# If summary is present, add summary MD and DOCX
|
||||||
if summary_text:
|
if summary_text:
|
||||||
attachments += [md_summary_path, docx_summary_path]
|
attachments += [md_summary_path, docx_summary_path]
|
||||||
if docx_combined_path:
|
|
||||||
attachments.append(docx_combined_path)
|
|
||||||
|
|
||||||
# Add PDFs based on mode:
|
|
||||||
# - Always: TRANSCRIPT.pdf
|
|
||||||
# - If summary: SUMMARY.pdf and COMBINED.pdf
|
|
||||||
if pdf_files.get("transcript_pdf"):
|
|
||||||
attachments.append(pdf_files["transcript_pdf"])
|
|
||||||
if pdf_files.get("summary_pdf"):
|
|
||||||
attachments.append(pdf_files["summary_pdf"])
|
|
||||||
if pdf_files.get("combined_pdf"):
|
|
||||||
attachments.append(pdf_files["combined_pdf"])
|
|
||||||
|
|
||||||
# 6) Send success email
|
# 6) Send success email
|
||||||
send_success_email(
|
send_success_email(
|
||||||
|
|||||||
Reference in New Issue
Block a user