Use structured filenames and formal DOCX transcript styling
This commit is contained in:
+50
-14
@@ -17,6 +17,32 @@ from .email_sender import create_transcript_docx, create_summary_docx
|
||||
logger = logging.getLogger("scraibe.tasks")
|
||||
|
||||
|
||||
def _local_part(email: str) -> str:
|
||||
"""
|
||||
Extract the part before '@' from an email, sanitized for filenames.
|
||||
"""
|
||||
local = (email or "").split("@")[0].strip()
|
||||
local = "".join(ch if ch.isalnum() or ch in ("-", "_", ".") else "_" for ch in local)
|
||||
return local or "user"
|
||||
|
||||
|
||||
def _date_tag() -> str:
|
||||
"""
|
||||
Date tag in DD-MON-YYYY format (e.g. 01-JAN-2025).
|
||||
"""
|
||||
return datetime.utcnow().strftime("%d-%b-%Y").upper()
|
||||
|
||||
|
||||
def _safe_filename(base: str, local: str, date_tag: str, ext: str) -> str:
|
||||
"""
|
||||
Create a temp file with the requested logical name.
|
||||
Uses mktemp for uniqueness but keeps the desired name pattern.
|
||||
"""
|
||||
name = f"{base}-{local}-{date_tag}{ext}"
|
||||
# Ensure uniqueness while preserving the logical name pattern
|
||||
return tempfile.mktemp(prefix=name.replace(".", ""), suffix=ext)
|
||||
|
||||
|
||||
def _remove_file(path: str):
|
||||
"""
|
||||
Remove a file if it exists. Best-effort; logs but never raises.
|
||||
@@ -224,6 +250,10 @@ def process_transcription_task(
|
||||
# Track all temporary files to clean up later
|
||||
temp_files = []
|
||||
|
||||
# Derive naming components
|
||||
local = _local_part(email_to)
|
||||
date_tag = _date_tag()
|
||||
|
||||
try:
|
||||
# 1) Determine queue position and send initial email
|
||||
queue_pos = get_queue_position(task_id)
|
||||
@@ -266,24 +296,21 @@ def process_transcription_task(
|
||||
segments = result.get("segments", [])
|
||||
raw_result = result.get("raw_result")
|
||||
|
||||
# 4) Prepare files for email
|
||||
attachments = []
|
||||
# 4) Prepare files for email with required naming
|
||||
|
||||
# Transcript as .md
|
||||
md_transcript_path = tempfile.mktemp(suffix=".md")
|
||||
md_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".md")
|
||||
with open(md_transcript_path, "w", encoding="utf-8") as f:
|
||||
f.write("# Transcript\n\n")
|
||||
f.write(transcript_text)
|
||||
attachments.append(md_transcript_path)
|
||||
temp_files.append(md_transcript_path)
|
||||
|
||||
# Transcript as .docx
|
||||
docx_transcript_path = tempfile.mktemp(suffix=".docx")
|
||||
# Transcript as .docx (with required style)
|
||||
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
|
||||
create_transcript_docx(transcript_text, docx_transcript_path)
|
||||
attachments.append(docx_transcript_path)
|
||||
temp_files.append(docx_transcript_path)
|
||||
|
||||
# JSON with diarization
|
||||
# JSON with diarization as SOURCE
|
||||
json_data = {
|
||||
"task": task_type,
|
||||
"transcript": transcript_text,
|
||||
@@ -300,27 +327,36 @@ def process_transcription_task(
|
||||
if raw_result is not None:
|
||||
json_data["raw_result"] = raw_result
|
||||
|
||||
json_path = tempfile.mktemp(suffix=".json")
|
||||
json_path = _safe_filename("SOURCE", local, date_tag, ".json")
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
||||
attachments.append(json_path)
|
||||
temp_files.append(json_path)
|
||||
|
||||
# Summary as .md (only when summary is available)
|
||||
if summary_text:
|
||||
md_summary_path = tempfile.mktemp(suffix=".md")
|
||||
md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md")
|
||||
with open(md_summary_path, "w", encoding="utf-8") as f:
|
||||
f.write("# Summary\n\n")
|
||||
f.write(summary_text)
|
||||
attachments.append(md_summary_path)
|
||||
temp_files.append(md_summary_path)
|
||||
|
||||
# Summary as .docx
|
||||
docx_summary_path = tempfile.mktemp(suffix=".docx")
|
||||
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
|
||||
create_summary_docx(summary_text, docx_summary_path)
|
||||
attachments.append(docx_summary_path)
|
||||
temp_files.append(docx_summary_path)
|
||||
|
||||
# All attachments
|
||||
attachments = [
|
||||
md_transcript_path,
|
||||
docx_transcript_path,
|
||||
json_path,
|
||||
]
|
||||
if summary_text:
|
||||
attachments += [
|
||||
md_summary_path,
|
||||
docx_summary_path,
|
||||
]
|
||||
|
||||
# 5) Send success email
|
||||
send_success_email(
|
||||
to=email_to,
|
||||
|
||||
Reference in New Issue
Block a user