bdd0a80d8d
- New watcher.py: polls WATCH_DIR, enqueues transcription+summary via Celery. - New process_watch_file_task in tasks.py. - Updated __main__.py: WebUI always runs; MCP and watcher run in parallel when enabled.
101 lines
2.6 KiB
Python
101 lines
2.6 KiB
Python
"""
|
|
Watch-folder mode for ScrAIbe.
|
|
|
|
Monitors a folder for audio files. For each file:
|
|
- Transcribes + summarizes
|
|
- Emails results
|
|
- Deletes source file
|
|
|
|
Configuration (env):
|
|
- WATCH_ENABLED: "true"/"false" (default: false)
|
|
- WATCH_DIR: directory to watch (required if enabled)
|
|
- WATCH_EMAIL_TO: destination email (required if enabled)
|
|
- WATCH_POLL_INTERVAL: seconds between scans (default: 10)
|
|
- WATCH_DELETE_ON_SUCCESS: "true"/"false" (default: true)
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
import logging
|
|
import threading
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger("scraibe.watcher")
|
|
|
|
AUDIO_EXTENSIONS = {
|
|
".wav",
|
|
".mp3",
|
|
".flac",
|
|
".m4a",
|
|
".ogg",
|
|
".webm",
|
|
".mp4",
|
|
}
|
|
|
|
|
|
def _is_audio(path: Path) -> bool:
|
|
return path.is_file() and path.suffix.lower() in AUDIO_EXTENSIONS
|
|
|
|
|
|
def _enqueue_file(file_path: Path):
|
|
"""
|
|
Enqueue a file for transcription + summarization via Celery.
|
|
"""
|
|
from .tasks import process_watch_file_task
|
|
|
|
try:
|
|
process_watch_file_task.delay(str(file_path))
|
|
except Exception as e:
|
|
logger.error("Failed to enqueue watch file %s: %s", file_path, e)
|
|
|
|
|
|
def _scan_directory(watch_dir: Path):
|
|
"""
|
|
Scan directory and enqueue all audio files.
|
|
"""
|
|
if not watch_dir.is_dir():
|
|
logger.warning("WATCH_DIR does not exist or is not a directory: %s", watch_dir)
|
|
return
|
|
|
|
for p in watch_dir.iterdir():
|
|
if _is_audio(p):
|
|
logger.info("Found audio file in WATCH_DIR: %s", p)
|
|
_enqueue_file(p)
|
|
|
|
|
|
def start_watcher():
|
|
"""
|
|
Start watch-folder loop in a background thread.
|
|
"""
|
|
enabled = os.getenv("WATCH_ENABLED", "false").strip().lower() in ("true", "1", "yes")
|
|
if not enabled:
|
|
return
|
|
|
|
watch_dir = os.getenv("WATCH_DIR")
|
|
if not watch_dir:
|
|
logger.warning("WATCH_ENABLED is true but WATCH_DIR is not set. Watcher disabled.")
|
|
return
|
|
|
|
email_to = os.getenv("WATCH_EMAIL_TO")
|
|
if not email_to:
|
|
logger.warning("WATCH_ENABLED is true but WATCH_EMAIL_TO is not set. Watcher disabled.")
|
|
return
|
|
|
|
interval = float(os.getenv("WATCH_POLL_INTERVAL", "10"))
|
|
|
|
watch_path = Path(watch_dir).expanduser().resolve()
|
|
watch_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
logger.info("Starting watch-folder: dir=%s, email=%s, interval=%s", watch_dir, email_to, interval)
|
|
|
|
def _loop():
|
|
while True:
|
|
try:
|
|
_scan_directory(watch_path)
|
|
except Exception as e:
|
|
logger.error("Error scanning WATCH_DIR: %s", e)
|
|
time.sleep(interval)
|
|
|
|
t = threading.Thread(target=_loop, daemon=True)
|
|
t.start()
|