diff --git a/scraibe/tasks.py b/scraibe/tasks.py index e01c04d..08efdfe 100644 --- a/scraibe/tasks.py +++ b/scraibe/tasks.py @@ -17,6 +17,19 @@ from .email_sender import create_transcript_docx, create_summary_docx logger = logging.getLogger("scraibe.tasks") +def _remove_file(path: str): + """ + Remove a file if it exists. Best-effort; logs but never raises. + """ + if not path: + return + try: + if os.path.exists(path): + os.remove(path) + except Exception as e: + logger.warning("Failed to remove file %s: %s", path, e) + + def get_queue_position(task_id: str) -> int: """ Estimate the job's position in the queue. @@ -200,6 +213,7 @@ def process_transcription_task( ): """ Async task: transcribe audio, optionally summarize, then email results. + Cleans up temporary files after completion. """ task_id = self.request.id @@ -207,22 +221,25 @@ def process_transcription_task( log_level = os.getenv("LOG_LEVEL", "INFO") setup_logging(level=log_level) - # 1) Determine queue position and send initial email - queue_pos = get_queue_position(task_id) - send_initial_email(to=email_to, queue_pos=queue_pos) - - # 2) Initialize Scraibe - try: - scraibe = Scraibe(verbose=True) - except Exception as e: - send_error_email( - to=email_to, - error_message=f"Failed to initialize transcription service: {e}", - task_id=task_id, - ) - raise + # Track all temporary files to clean up later + temp_files = [] try: + # 1) Determine queue position and send initial email + queue_pos = get_queue_position(task_id) + send_initial_email(to=email_to, queue_pos=queue_pos) + + # 2) Initialize Scraibe + try: + scraibe = Scraibe(verbose=True) + except Exception as e: + send_error_email( + to=email_to, + error_message=f"Failed to initialize transcription service: {e}", + task_id=task_id, + ) + raise + # 3) Perform transcription if task_type == "transcript_and_summarize": result = scraibe.transcript_and_summarize( @@ -258,11 +275,13 @@ def process_transcription_task( f.write("# Transcript\n\n") f.write(transcript_text) attachments.append(md_transcript_path) + temp_files.append(md_transcript_path) # Transcript as .docx docx_transcript_path = tempfile.mktemp(suffix=".docx") create_transcript_docx(transcript_text, docx_transcript_path) attachments.append(docx_transcript_path) + temp_files.append(docx_transcript_path) # JSON with diarization json_data = { @@ -285,6 +304,7 @@ def process_transcription_task( with open(json_path, "w", encoding="utf-8") as f: json.dump(json_data, f, indent=2, ensure_ascii=False) attachments.append(json_path) + temp_files.append(json_path) # Summary as .md (only when summary is available) if summary_text: @@ -293,11 +313,13 @@ def process_transcription_task( f.write("# Summary\n\n") f.write(summary_text) attachments.append(md_summary_path) + temp_files.append(md_summary_path) # Summary as .docx docx_summary_path = tempfile.mktemp(suffix=".docx") create_summary_docx(summary_text, docx_summary_path) attachments.append(docx_summary_path) + temp_files.append(docx_summary_path) # 5) Send success email send_success_email( @@ -318,3 +340,13 @@ def process_transcription_task( task_id=task_id, ) raise e + finally: + # 6) Cleanup temporary files (best-effort) + for path in temp_files: + _remove_file(path) + + # Also remove uploaded audio file + if audio_path: + _remove_file(audio_path) + + logger.info("Cleanup completed for job %s.", task_id)