Add watch-folder mode and wire MCP/watcher into entrypoint
Mirror and run GitLab CI / build (push) Waiting to run
Ruff / ruff (push) Waiting to run

- New watcher.py: polls WATCH_DIR, enqueues transcription+summary via Celery.
- New process_watch_file_task in tasks.py.
- Updated __main__.py: WebUI always runs; MCP and watcher run in parallel when enabled.
This commit is contained in:
admin
2026-06-19 17:18:20 +00:00
parent 7a31be9de5
commit bdd0a80d8d
3 changed files with 249 additions and 1 deletions
+142
View File
@@ -569,3 +569,145 @@ def process_mcp_transcribe_task(
finally:
_remove_file(audio_path)
logger.info("MCP job %s cleanup completed.", job_id)
@celery_app.task(
name="scraibe.tasks.process_watch_file_task",
bind=True,
max_retries=1,
task_time_limit=14400,
task_soft_time_limit=13500,
)
def process_watch_file_task(
self,
file_path: str,
):
"""
Async task for watch-folder mode:
- Transcribe + summarize
- Email results
- Optionally delete source file
"""
task_id = self.request.id
log_level = os.getenv("LOG_LEVEL", "INFO")
setup_logging(level=log_level)
email_to = os.getenv("WATCH_EMAIL_TO") or os.getenv("EMAIL_DEFAULT_TO")
if not email_to:
logger.error("No email address configured for watch-folder mode.")
raise RuntimeError("WATCH_EMAIL_TO or EMAIL_DEFAULT_TO not set.")
delete_on_success = os.getenv("WATCH_DELETE_ON_SUCCESS", "true").strip().lower() in ("true", "1", "yes")
temp_files = []
local = "watch"
date_tag = _date_tag()
try:
scraibe = Scraibe(verbose=True)
result = scraibe.transcript_and_summarize(
audio_file=file_path,
language=None,
num_speakers=None,
verbose=True,
for_export=True,
)
transcript_text = result.get("transcript", "")
summary_text = result.get("summary", "")
segments = result.get("segments", [])
raw_result = result.get("raw_result")
# Transcript .md
md_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".md")
with open(md_transcript_path, "w", encoding="utf-8") as f:
f.write("# Transcript\n\n")
f.write(transcript_text)
temp_files.append(md_transcript_path)
# Transcript .docx
docx_transcript_path = _safe_filename("TRANSCRIPT", local, date_tag, ".docx")
create_transcript_docx(
transcript_text,
docx_transcript_path,
)
temp_files.append(docx_transcript_path)
# Summary .md
md_summary_path = _safe_filename("SUMMARY", local, date_tag, ".md")
with open(md_summary_path, "w", encoding="utf-8") as f:
f.write("# Summary\n\n")
f.write(summary_text)
temp_files.append(md_summary_path)
# Summary .docx
docx_summary_path = _safe_filename("SUMMARY", local, date_tag, ".docx")
create_summary_docx(
summary_text,
docx_summary_path,
)
temp_files.append(docx_summary_path)
# JSON as SOURCE
json_data = {
"task": "watch_transcript_and_summarize",
"transcript": transcript_text,
"summary": summary_text,
"segments": segments,
"metadata": {
"timestamp": datetime.utcnow().isoformat(),
"job_id": task_id,
"source_file": file_path,
},
}
if raw_result is not None:
json_data["raw_result"] = raw_result
json_path = _safe_filename("SOURCE", local, date_tag, ".json")
with open(json_path, "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=2, ensure_ascii=False)
temp_files.append(json_path)
# Attachments
attachments = [
md_transcript_path,
docx_transcript_path,
md_summary_path,
docx_summary_path,
json_path,
]
# Send email
send_success_email(
to=email_to,
transcript_text=transcript_text,
summary_text=summary_text,
attachments=attachments,
task_id=task_id,
)
logger.info("Watch-folder job %s completed for %s.", task_id, file_path)
# Delete source file if configured
if delete_on_success and os.path.exists(file_path):
try:
os.remove(file_path)
logger.info("Deleted source file: %s", file_path)
except Exception as e:
logger.warning("Failed to delete source file %s: %s", file_path, e)
except Exception as e:
logger.error("Error processing watch file %s: %s", file_path, e, exc_info=True)
send_error_email(
to=email_to,
error_message=str(e),
task_id=task_id,
)
raise e
finally:
# Cleanup temp files
for path in temp_files:
_remove_file(path)
logger.info("Watch-folder job %s cleanup completed.", task_id)