Add cleanup of temp and upload files after transcription job
This commit is contained in:
+33
-1
@@ -17,6 +17,19 @@ from .email_sender import create_transcript_docx, create_summary_docx
|
|||||||
logger = logging.getLogger("scraibe.tasks")
|
logger = logging.getLogger("scraibe.tasks")
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_file(path: str):
|
||||||
|
"""
|
||||||
|
Remove a file if it exists. Best-effort; logs but never raises.
|
||||||
|
"""
|
||||||
|
if not path:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if os.path.exists(path):
|
||||||
|
os.remove(path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to remove file %s: %s", path, e)
|
||||||
|
|
||||||
|
|
||||||
def get_queue_position(task_id: str) -> int:
|
def get_queue_position(task_id: str) -> int:
|
||||||
"""
|
"""
|
||||||
Estimate the job's position in the queue.
|
Estimate the job's position in the queue.
|
||||||
@@ -200,6 +213,7 @@ def process_transcription_task(
|
|||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Async task: transcribe audio, optionally summarize, then email results.
|
Async task: transcribe audio, optionally summarize, then email results.
|
||||||
|
Cleans up temporary files after completion.
|
||||||
"""
|
"""
|
||||||
task_id = self.request.id
|
task_id = self.request.id
|
||||||
|
|
||||||
@@ -207,6 +221,10 @@ def process_transcription_task(
|
|||||||
log_level = os.getenv("LOG_LEVEL", "INFO")
|
log_level = os.getenv("LOG_LEVEL", "INFO")
|
||||||
setup_logging(level=log_level)
|
setup_logging(level=log_level)
|
||||||
|
|
||||||
|
# Track all temporary files to clean up later
|
||||||
|
temp_files = []
|
||||||
|
|
||||||
|
try:
|
||||||
# 1) Determine queue position and send initial email
|
# 1) Determine queue position and send initial email
|
||||||
queue_pos = get_queue_position(task_id)
|
queue_pos = get_queue_position(task_id)
|
||||||
send_initial_email(to=email_to, queue_pos=queue_pos)
|
send_initial_email(to=email_to, queue_pos=queue_pos)
|
||||||
@@ -222,7 +240,6 @@ def process_transcription_task(
|
|||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
try:
|
|
||||||
# 3) Perform transcription
|
# 3) Perform transcription
|
||||||
if task_type == "transcript_and_summarize":
|
if task_type == "transcript_and_summarize":
|
||||||
result = scraibe.transcript_and_summarize(
|
result = scraibe.transcript_and_summarize(
|
||||||
@@ -258,11 +275,13 @@ def process_transcription_task(
|
|||||||
f.write("# Transcript\n\n")
|
f.write("# Transcript\n\n")
|
||||||
f.write(transcript_text)
|
f.write(transcript_text)
|
||||||
attachments.append(md_transcript_path)
|
attachments.append(md_transcript_path)
|
||||||
|
temp_files.append(md_transcript_path)
|
||||||
|
|
||||||
# Transcript as .docx
|
# Transcript as .docx
|
||||||
docx_transcript_path = tempfile.mktemp(suffix=".docx")
|
docx_transcript_path = tempfile.mktemp(suffix=".docx")
|
||||||
create_transcript_docx(transcript_text, docx_transcript_path)
|
create_transcript_docx(transcript_text, docx_transcript_path)
|
||||||
attachments.append(docx_transcript_path)
|
attachments.append(docx_transcript_path)
|
||||||
|
temp_files.append(docx_transcript_path)
|
||||||
|
|
||||||
# JSON with diarization
|
# JSON with diarization
|
||||||
json_data = {
|
json_data = {
|
||||||
@@ -285,6 +304,7 @@ def process_transcription_task(
|
|||||||
with open(json_path, "w", encoding="utf-8") as f:
|
with open(json_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
||||||
attachments.append(json_path)
|
attachments.append(json_path)
|
||||||
|
temp_files.append(json_path)
|
||||||
|
|
||||||
# Summary as .md (only when summary is available)
|
# Summary as .md (only when summary is available)
|
||||||
if summary_text:
|
if summary_text:
|
||||||
@@ -293,11 +313,13 @@ def process_transcription_task(
|
|||||||
f.write("# Summary\n\n")
|
f.write("# Summary\n\n")
|
||||||
f.write(summary_text)
|
f.write(summary_text)
|
||||||
attachments.append(md_summary_path)
|
attachments.append(md_summary_path)
|
||||||
|
temp_files.append(md_summary_path)
|
||||||
|
|
||||||
# Summary as .docx
|
# Summary as .docx
|
||||||
docx_summary_path = tempfile.mktemp(suffix=".docx")
|
docx_summary_path = tempfile.mktemp(suffix=".docx")
|
||||||
create_summary_docx(summary_text, docx_summary_path)
|
create_summary_docx(summary_text, docx_summary_path)
|
||||||
attachments.append(docx_summary_path)
|
attachments.append(docx_summary_path)
|
||||||
|
temp_files.append(docx_summary_path)
|
||||||
|
|
||||||
# 5) Send success email
|
# 5) Send success email
|
||||||
send_success_email(
|
send_success_email(
|
||||||
@@ -318,3 +340,13 @@ def process_transcription_task(
|
|||||||
task_id=task_id,
|
task_id=task_id,
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
finally:
|
||||||
|
# 6) Cleanup temporary files (best-effort)
|
||||||
|
for path in temp_files:
|
||||||
|
_remove_file(path)
|
||||||
|
|
||||||
|
# Also remove uploaded audio file
|
||||||
|
if audio_path:
|
||||||
|
_remove_file(audio_path)
|
||||||
|
|
||||||
|
logger.info("Cleanup completed for job %s.", task_id)
|
||||||
|
|||||||
Reference in New Issue
Block a user