Use structured filenames and formal DOCX transcript styling
Mirror and run GitLab CI / build (push) Has been cancelled
Ruff / ruff (push) Has been cancelled

This commit is contained in:
admin
2026-06-14 16:20:10 +00:00
parent 2dce9b43c9
commit a8f48b9e58
2 changed files with 121 additions and 27 deletions
+50 -14
View File
@@ -17,6 +17,32 @@ from .email_sender import create_transcript_docx, create_summary_docx
logger = logging.getLogger("scraibe.tasks")
def _local_part(email: str) -> str:
"""
Extract the part before '@' from an email, sanitized for filenames.
"""
local = (email or "").split("@")[0].strip()
local = "".join(ch if ch.isalnum() or ch in ("-", "_", ".") else "_" for ch in local)
return local or "user"
def _date_tag() -> str:
"""
Date tag in DD-MON-YYYY format (e.g. 01-JAN-2025).
"""
return datetime.utcnow().strftime("%d-%b-%Y").upper()
def _safe_filename(base: str, local: str, date_tag: str, ext: str) -> str:
"""
Create a temp file with the requested logical name.
Uses mktemp for uniqueness but keeps the desired name pattern.
"""
name = f"{base}-{local}-{date_tag}{ext}"
# Ensure uniqueness while preserving the logical name pattern
return tempfile.mktemp(prefix=name.replace(".", ""), suffix=ext)
def _remove_file(path: str):
"""
Remove a file if it exists. Best-effort; logs but never raises.
@@ -224,6 +250,10 @@ def process_transcription_task(
# Track all temporary files to clean up later
temp_files = []
# Derive naming components
local = _local_part(email_to)
date_tag = _date_tag()
try:
# 1) Determine queue position and send initial email
queue_pos = get_queue_position(task_id)
@@ -266,24 +296,21 @@ def process_transcription_task(
segments = result.get("segments", [])
raw_result = result.get("raw_result")
# 4) Prepare files for email
attachments = []
# 4) Prepare files for email with required naming
# Transcript as .md
md_transcript_path = tempfile.mktemp(suffix=".md")
md_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".md")
with open(md_transcript_path, "w", encoding="utf-8") as f:
f.write("# Transcript\n\n")
f.write(transcript_text)
attachments.append(md_transcript_path)
temp_files.append(md_transcript_path)
# Transcript as .docx
docx_transcript_path = tempfile.mktemp(suffix=".docx")
# Transcript as .docx (with required style)
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
create_transcript_docx(transcript_text, docx_transcript_path)
attachments.append(docx_transcript_path)
temp_files.append(docx_transcript_path)
# JSON with diarization
# JSON with diarization as SOURCE
json_data = {
"task": task_type,
"transcript": transcript_text,
@@ -300,27 +327,36 @@ def process_transcription_task(
if raw_result is not None:
json_data["raw_result"] = raw_result
json_path = tempfile.mktemp(suffix=".json")
json_path = _safe_filename("SOURCE", local, date_tag, ".json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=2, ensure_ascii=False)
attachments.append(json_path)
temp_files.append(json_path)
# Summary as .md (only when summary is available)
if summary_text:
md_summary_path = tempfile.mktemp(suffix=".md")
md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md")
with open(md_summary_path, "w", encoding="utf-8") as f:
f.write("# Summary\n\n")
f.write(summary_text)
attachments.append(md_summary_path)
temp_files.append(md_summary_path)
# Summary as .docx
docx_summary_path = tempfile.mktemp(suffix=".docx")
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
create_summary_docx(summary_text, docx_summary_path)
attachments.append(docx_summary_path)
temp_files.append(docx_summary_path)
# All attachments
attachments = [
md_transcript_path,
docx_transcript_path,
json_path,
]
if summary_text:
attachments += [
md_summary_path,
docx_summary_path,
]
# 5) Send success email
send_success_email(
to=email_to,