Add cleanup of temp and upload files after transcription job
This commit is contained in:
+33
-1
@@ -17,6 +17,19 @@ from .email_sender import create_transcript_docx, create_summary_docx
|
||||
logger = logging.getLogger("scraibe.tasks")
|
||||
|
||||
|
||||
def _remove_file(path: str):
|
||||
"""
|
||||
Remove a file if it exists. Best-effort; logs but never raises.
|
||||
"""
|
||||
if not path:
|
||||
return
|
||||
try:
|
||||
if os.path.exists(path):
|
||||
os.remove(path)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to remove file %s: %s", path, e)
|
||||
|
||||
|
||||
def get_queue_position(task_id: str) -> int:
|
||||
"""
|
||||
Estimate the job's position in the queue.
|
||||
@@ -200,6 +213,7 @@ def process_transcription_task(
|
||||
):
|
||||
"""
|
||||
Async task: transcribe audio, optionally summarize, then email results.
|
||||
Cleans up temporary files after completion.
|
||||
"""
|
||||
task_id = self.request.id
|
||||
|
||||
@@ -207,6 +221,10 @@ def process_transcription_task(
|
||||
log_level = os.getenv("LOG_LEVEL", "INFO")
|
||||
setup_logging(level=log_level)
|
||||
|
||||
# Track all temporary files to clean up later
|
||||
temp_files = []
|
||||
|
||||
try:
|
||||
# 1) Determine queue position and send initial email
|
||||
queue_pos = get_queue_position(task_id)
|
||||
send_initial_email(to=email_to, queue_pos=queue_pos)
|
||||
@@ -222,7 +240,6 @@ def process_transcription_task(
|
||||
)
|
||||
raise
|
||||
|
||||
try:
|
||||
# 3) Perform transcription
|
||||
if task_type == "transcript_and_summarize":
|
||||
result = scraibe.transcript_and_summarize(
|
||||
@@ -258,11 +275,13 @@ def process_transcription_task(
|
||||
f.write("# Transcript\n\n")
|
||||
f.write(transcript_text)
|
||||
attachments.append(md_transcript_path)
|
||||
temp_files.append(md_transcript_path)
|
||||
|
||||
# Transcript as .docx
|
||||
docx_transcript_path = tempfile.mktemp(suffix=".docx")
|
||||
create_transcript_docx(transcript_text, docx_transcript_path)
|
||||
attachments.append(docx_transcript_path)
|
||||
temp_files.append(docx_transcript_path)
|
||||
|
||||
# JSON with diarization
|
||||
json_data = {
|
||||
@@ -285,6 +304,7 @@ def process_transcription_task(
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
||||
attachments.append(json_path)
|
||||
temp_files.append(json_path)
|
||||
|
||||
# Summary as .md (only when summary is available)
|
||||
if summary_text:
|
||||
@@ -293,11 +313,13 @@ def process_transcription_task(
|
||||
f.write("# Summary\n\n")
|
||||
f.write(summary_text)
|
||||
attachments.append(md_summary_path)
|
||||
temp_files.append(md_summary_path)
|
||||
|
||||
# Summary as .docx
|
||||
docx_summary_path = tempfile.mktemp(suffix=".docx")
|
||||
create_summary_docx(summary_text, docx_summary_path)
|
||||
attachments.append(docx_summary_path)
|
||||
temp_files.append(docx_summary_path)
|
||||
|
||||
# 5) Send success email
|
||||
send_success_email(
|
||||
@@ -318,3 +340,13 @@ def process_transcription_task(
|
||||
task_id=task_id,
|
||||
)
|
||||
raise e
|
||||
finally:
|
||||
# 6) Cleanup temporary files (best-effort)
|
||||
for path in temp_files:
|
||||
_remove_file(path)
|
||||
|
||||
# Also remove uploaded audio file
|
||||
if audio_path:
|
||||
_remove_file(audio_path)
|
||||
|
||||
logger.info("Cleanup completed for job %s.", task_id)
|
||||
|
||||
Reference in New Issue
Block a user