Add cover pages to transcript/summary .docx with AI-generated descriptions; include combined .docx when both requested
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 21:33:15 +00:00
parent efb34dd9ff
commit 7adca3d921
2 changed files with 194 additions and 48 deletions
+89 -3
View File
@@ -14,6 +14,7 @@ from .summarizer import SummarizerClient, SummarizerError
from .misc import setup_logging
from .email_sender import send_email, EmailError, load_template
from .email_sender import create_transcript_docx, create_summary_docx
from .email_sender import create_combined_docx
logger = logging.getLogger("scraibe.tasks")
@@ -405,6 +406,58 @@ def process_transcription_task(
)
speaker_map = {}
# 3c) Generate short cover-page descriptions using summarizer
transcript_cover_desc = ""
summary_cover_desc = ""
today_str = datetime.utcnow().strftime("%Y-%m-%d")
try:
scraibe._ensure_summarizer()
summarizer = scraibe._summarizer
# Transcript description
prompt = (
"In one short sentence (max 25 words), describe the content of this transcript "
"for use on a cover page. Do not include speaker names. Output only the sentence.\n\n"
+ transcript_text
)
response = summarizer._chat_completion(
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=60,
)
transcript_cover_desc = (
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
).strip()
if not transcript_cover_desc:
transcript_cover_desc = "Transcript of the recorded conversation."
logger.info("Transcript cover description: %s", transcript_cover_desc)
# Summary description (if summary requested)
if include_summary:
prompt = (
"In one short sentence (max 25 words), describe the content of this summary "
"for use on a cover page. Output only the sentence.\n\n"
+ summary_text
)
response = summarizer._chat_completion(
messages=[{"role": "user", "content": prompt}],
temperature=0.3,
max_tokens=60,
)
summary_cover_desc = (
(response or {}).get("choices", [{}])[0].get("message", {}).get("content", "")
).strip()
if not summary_cover_desc:
summary_cover_desc = "Summary of the recorded conversation."
logger.info("Summary cover description: %s", summary_cover_desc)
except (SummarizerError, Exception) as e:
logger.warning("Failed to generate cover descriptions: %s", e)
transcript_cover_desc = transcript_cover_desc or "Transcript of the recorded conversation."
if include_summary:
summary_cover_desc = summary_cover_desc or "Summary of the recorded conversation."
# 4) Prepare files
# Transcript .md
@@ -414,9 +467,15 @@ def process_transcription_task(
f.write(transcript_text)
temp_files.append(md_transcript_path)
# Transcript .docx
# Transcript .docx (standalone, with cover page)
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
create_transcript_docx(transcript_text, docx_transcript_path)
create_transcript_docx(
transcript_text,
docx_transcript_path,
include_cover=True,
cover_date=today_str,
cover_desc=transcript_cover_desc,
)
temp_files.append(docx_transcript_path)
# JSON as SOURCE
@@ -440,17 +499,42 @@ def process_transcription_task(
temp_files.append(json_path)
# Summary files (if present)
md_summary_path = None
docx_summary_path = None
docx_combined_path = None
if summary_text:
# Summary .md
md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md")
with open(md_summary_path, "w", encoding="utf-8") as f:
f.write("# Summary\n\n")
f.write(summary_text)
temp_files.append(md_summary_path)
# Summary .docx (standalone, with cover page)
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
create_summary_docx(summary_text, docx_summary_path)
create_summary_docx(
summary_text,
docx_summary_path,
include_cover=True,
cover_date=today_str,
cover_desc=summary_cover_desc,
)
temp_files.append(docx_summary_path)
# Combined .docx: Transcript + Summary with cover pages
docx_combined_path = _safe_filename("TRANSCRIPT_AND_SUMMARY", local, date_tag, ".docx")
create_combined_docx(
transcript_text=transcript_text,
summary_text=summary_text,
filename=docx_combined_path,
transcript_cover_date=today_str,
transcript_cover_desc=transcript_cover_desc,
summary_cover_date=today_str,
summary_cover_desc=summary_cover_desc,
)
temp_files.append(docx_combined_path)
attachments = [
md_transcript_path,
docx_transcript_path,
@@ -458,6 +542,8 @@ def process_transcription_task(
]
if summary_text:
attachments += [md_summary_path, docx_summary_path]
if docx_combined_path:
attachments.append(docx_combined_path)
# 5) Send success email
send_success_email(