Revert PDF generation; simplify to DOCX + MD + JSON only
- Remove PDF helpers, LibreOffice, PyPDF2, reportlab - Transcript DOCX: standalone, no cover page, with line numbering - Summary DOCX: standalone, no cover page, no line numbering - Attachments: - Transcribe: JSON, transcript MD, transcript DOCX - Transcribe & Summarize: JSON, transcript MD, transcript DOCX, summary MD, summary DOCX
This commit is contained in:
+5
-114
@@ -14,7 +14,6 @@ from .summarizer import SummarizerClient, SummarizerError
|
||||
from .misc import setup_logging
|
||||
from .email_sender import send_email, EmailError, load_template
|
||||
from .email_sender import create_transcript_docx, create_summary_docx
|
||||
from .email_sender import create_combined_docx, generate_pdf_documents
|
||||
|
||||
logger = logging.getLogger("scraibe.tasks")
|
||||
|
||||
@@ -93,7 +92,7 @@ def get_queue_position(task_id: str) -> int:
|
||||
if found:
|
||||
break
|
||||
|
||||
# If not found in reserved, it may already be active or not yet visible.
|
||||
# If not found in reserved, it may already be active or not yet visible.
|
||||
# In that case, treat it as position 1.
|
||||
if found:
|
||||
return max(ahead + 1, 1)
|
||||
@@ -415,59 +414,6 @@ def process_transcription_task(
|
||||
)
|
||||
speaker_map = {}
|
||||
|
||||
# 3c) Generate short cover-page descriptions using summarizer
|
||||
transcript_cover_desc = ""
|
||||
summary_cover_desc = ""
|
||||
dt = datetime.utcnow()
|
||||
today_str = f"{dt.strftime('%B')} {dt.day}, {dt.year}"
|
||||
|
||||
try:
|
||||
scraibe._ensure_summarizer()
|
||||
summarizer = scraibe._summarizer
|
||||
|
||||
# Transcript description
|
||||
prompt = (
|
||||
"In one short sentence (max 25 words), describe the content of this transcript "
|
||||
"for use on a cover page. Do not include speaker names. Output only the sentence.\n\n"
|
||||
+ transcript_text
|
||||
)
|
||||
response = summarizer._chat_completion(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.3,
|
||||
max_tokens=60,
|
||||
)
|
||||
transcript_cover_desc = (
|
||||
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
).strip()
|
||||
if not transcript_cover_desc:
|
||||
transcript_cover_desc = "Transcript of the recorded conversation."
|
||||
logger.info("Transcript cover description: %s", transcript_cover_desc)
|
||||
|
||||
# Summary description (if summary requested)
|
||||
if include_summary:
|
||||
prompt = (
|
||||
"In one short sentence (max 25 words), describe the content of this summary "
|
||||
"for use on a cover page. Output only the sentence.\n\n"
|
||||
+ summary_text
|
||||
)
|
||||
response = summarizer._chat_completion(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0.3,
|
||||
max_tokens=60,
|
||||
)
|
||||
summary_cover_desc = (
|
||||
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
).strip()
|
||||
if not summary_cover_desc:
|
||||
summary_cover_desc = "Summary of the recorded conversation."
|
||||
logger.info("Summary cover description: %s", summary_cover_desc)
|
||||
|
||||
except (SummarizerError, Exception) as e:
|
||||
logger.warning("Failed to generate cover descriptions: %s", e)
|
||||
transcript_cover_desc = transcript_cover_desc or "Transcript of the recorded conversation."
|
||||
if include_summary:
|
||||
summary_cover_desc = summary_cover_desc or "Summary of the recorded conversation."
|
||||
|
||||
# 4) Prepare files
|
||||
|
||||
# Transcript .md
|
||||
@@ -477,14 +423,11 @@ def process_transcription_task(
|
||||
f.write(transcript_text)
|
||||
temp_files.append(md_transcript_path)
|
||||
|
||||
# Transcript .docx (standalone, with cover page)
|
||||
# Transcript .docx (standalone, no cover page)
|
||||
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
|
||||
create_transcript_docx(
|
||||
transcript_text,
|
||||
docx_transcript_path,
|
||||
include_cover=True,
|
||||
cover_date=today_str,
|
||||
cover_desc=transcript_cover_desc,
|
||||
)
|
||||
temp_files.append(docx_transcript_path)
|
||||
|
||||
@@ -511,7 +454,6 @@ def process_transcription_task(
|
||||
# Summary files (if present)
|
||||
md_summary_path = None
|
||||
docx_summary_path = None
|
||||
docx_combined_path = None
|
||||
|
||||
if summary_text:
|
||||
# Summary .md
|
||||
@@ -521,77 +463,26 @@ def process_transcription_task(
|
||||
f.write(summary_text)
|
||||
temp_files.append(md_summary_path)
|
||||
|
||||
# Summary .docx (standalone, with cover page)
|
||||
# Summary .docx (standalone, no cover page)
|
||||
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
|
||||
create_summary_docx(
|
||||
summary_text,
|
||||
docx_summary_path,
|
||||
include_cover=True,
|
||||
cover_date=today_str,
|
||||
cover_desc=summary_cover_desc,
|
||||
)
|
||||
temp_files.append(docx_summary_path)
|
||||
|
||||
# Combined .docx: Transcript + Summary with cover pages
|
||||
docx_combined_path = _safe_filename("TRANSCRIPT_AND_SUMMARY", local, date_tag, ".docx")
|
||||
create_combined_docx(
|
||||
transcript_text=transcript_text,
|
||||
summary_text=summary_text,
|
||||
filename=docx_combined_path,
|
||||
transcript_cover_date=today_str,
|
||||
transcript_cover_desc=transcript_cover_desc,
|
||||
summary_cover_date=today_str,
|
||||
summary_cover_desc=summary_cover_desc,
|
||||
)
|
||||
temp_files.append(docx_combined_path)
|
||||
|
||||
# 4b) Generate PDF documents
|
||||
|
||||
# Always use transcript text; use summary_text only if include_summary is True
|
||||
pdf_summary_text = summary_text if include_summary else ""
|
||||
|
||||
pdf_files = {}
|
||||
try:
|
||||
pdf_output_dir = tempfile.mkdtemp(prefix="pdf_output_")
|
||||
pdf_files = generate_pdf_documents(
|
||||
transcript_text=transcript_text,
|
||||
summary_text=pdf_summary_text,
|
||||
output_dir=pdf_output_dir,
|
||||
transcript_cover_date=today_str,
|
||||
transcript_cover_desc=transcript_cover_desc,
|
||||
summary_cover_date=today_str,
|
||||
summary_cover_desc=summary_cover_desc,
|
||||
)
|
||||
for pdf_path in pdf_files.values():
|
||||
if pdf_path and os.path.exists(pdf_path):
|
||||
temp_files.append(pdf_path)
|
||||
except Exception as e:
|
||||
logger.warning("PDF generation failed (continuing with DOCX attachments): %s", e)
|
||||
|
||||
# 5) Build attachments list
|
||||
|
||||
# Base attachments: JSON, transcript MD, transcript DOCX
|
||||
# Always: JSON, transcript MD, transcript DOCX
|
||||
attachments = [
|
||||
md_transcript_path,
|
||||
docx_transcript_path,
|
||||
json_path,
|
||||
]
|
||||
|
||||
# If summary is present, add summary MD and DOCXs
|
||||
# If summary is present, add summary MD and DOCX
|
||||
if summary_text:
|
||||
attachments += [md_summary_path, docx_summary_path]
|
||||
if docx_combined_path:
|
||||
attachments.append(docx_combined_path)
|
||||
|
||||
# Add PDFs based on mode:
|
||||
# - Always: TRANSCRIPT.pdf
|
||||
# - If summary: SUMMARY.pdf and COMBINED.pdf
|
||||
if pdf_files.get("transcript_pdf"):
|
||||
attachments.append(pdf_files["transcript_pdf"])
|
||||
if pdf_files.get("summary_pdf"):
|
||||
attachments.append(pdf_files["summary_pdf"])
|
||||
if pdf_files.get("combined_pdf"):
|
||||
attachments.append(pdf_files["combined_pdf"])
|
||||
|
||||
# 6) Send success email
|
||||
send_success_email(
|
||||
|
||||
Reference in New Issue
Block a user