Ensure Docker container always starts Web GUI (not CLI)

2026-06-14 05:11:38 +00:00
parent d854d498cd
commit 37d30e0ee2
4 changed files with 292 additions and 2 deletions
@@ -0,0 +1,275 @@
+"""
+ScrAIbe Web GUI (Gradio)
+------------------------
+
+Runs the Web GUI that:
+- Accepts audio uploads
+- Sends audio to LocalAI for transcription + diarization
+- Optionally sends transcript to a second LLM for summarization
+- Returns transcript (and summary) in the browser
+
+This is the default entrypoint when running in Docker.
+"""
+
+import os
+import logging
+import tempfile
+
+import gradio as gr
+
+from .autotranscript import Scraibe
+from .misc import setup_logging
+
+logger = logging.getLogger("scraibe.webui")
+
+
+def load_config():
+    """
+    Load configuration from misc/config.yaml if present.
+    Primary runtime configuration is via environment variables.
+    """
+    config_path = os.getenv("SCRAIBE_CONFIG", "/app/src/misc/config.yaml")
+    config = {}
+    if os.path.exists(config_path):
+        try:
+            import yaml
+            with open(config_path, "r", encoding="utf-8") as f:
+                config = yaml.safe_load(f) or {}
+        except Exception as e:
+            logger.warning("Failed to load config from %s: %s", config_path, e)
+    return config
+
+
+def create_app():
+    """
+    Create and launch the Gradio Web GUI.
+    """
+
+    # Logging
+    log_level = os.getenv("LOG_LEVEL", "INFO")
+    setup_logging(level=log_level)
+
+    # Load config (branding, layout, etc.)
+    config = load_config()
+    layout_cfg = config.get("layout", {})
+    launch_cfg = config.get("launch", {})
+
+    logger.info("Starting ScrAIbe Web GUI.")
+
+    # Initialize Scraibe (LocalAI-backed)
+    # If LocalAI is unreachable at startup, still launch the UI
+    # and let individual transcription calls fail with a clear message.
+    scraibe = None
+    try:
+        scraibe = Scraibe(verbose=True)
+    except Exception as e:
+        logger.warning(
+            "Failed to initialize Scraibe at startup (LocalAI may be down): %s. "
+            "Web GUI will start; transcription will fail until LocalAI is reachable.",
+            e,
+        )
+
+    # Helper: run transcription via LocalAI API
+    def run_transcribe(audio_path, task, language, num_speakers):
+        if not audio_path:
+            raise ValueError("No audio file provided.")
+
+        try:
+            if task == "transcript_and_summarize":
+                result = scraibe.transcript_and_summarize(
+                    audio_file=audio_path,
+                    language=language or None,
+                    num_speakers=int(num_speakers) if num_speakers else None,
+                    verbose=True,
+                )
+                transcript_text = result.get("transcript", "")
+                summary_text = result.get("summary", "")
+
+                # Save as .md
+                md_path = tempfile.mktemp(suffix=".md")
+                with open(md_path, "w", encoding="utf-8") as f:
+                    f.write("# Transcript\n\n")
+                    f.write(transcript_text)
+                    f.write("\n\n# Summary\n\n")
+                    f.write(summary_text)
+
+                return (
+                    transcript_text,
+                    summary_text,
+                    md_path,
+                    "Transcription and summarization completed.",
+                )
+            else:
+                # Default: transcribe only
+                text = scraibe.transcribe(
+                    audio_file=audio_path,
+                    language=language or None,
+                    num_speakers=int(num_speakers) if num_speakers else None,
+                    verbose=True,
+                )
+
+                # Save as .txt
+                txt_path = tempfile.mktemp(suffix=".txt")
+                with open(txt_path, "w", encoding="utf-8") as f:
+                    f.write(text)
+
+                return (
+                    text,
+                    "",
+                    txt_path,
+                    "Transcription completed.",
+                )
+        except Exception as e:
+            logger.error("Error during transcription: %s", e)
+            return (
+                "",
+                "",
+                None,
+                f"Error: {e}",
+            )
+
+    # Load header/footer HTML if present
+    header_path = layout_cfg.get("header", "/app/src/misc/header.html")
+    footer_path = layout_cfg.get("footer", "/app/src/misc/footer.html")
+
+    header_html = ""
+    footer_html = ""
+
+    if header_path and os.path.exists(header_path):
+        with open(header_path, "r", encoding="utf-8") as f:
+            header_html = f.read()
+
+    if footer_path and os.path.exists(footer_path):
+        with open(footer_path, "r", encoding="utf-8") as f:
+            footer_html = f.read()
+
+    # Build Gradio interface
+    with gr.Blocks(
+        title="A.P.Strom Transcription",
+        css="body { font-family: Arial, sans-serif; }",
+    ) as app:
+
+        # Header
+        if header_html:
+            gr.HTML(header_html)
+
+        with gr.Row():
+            with gr.Column(scale=2):
+                audio_input = gr.Audio(
+                    label="Upload or record audio",
+                    type="filepath",
+                )
+
+                with gr.Row():
+                    task_choice = gr.Radio(
+                        choices=[
+                            ("Transcribe", "transcribe"),
+                            ("Transcript & Summarize", "transcript_and_summarize"),
+                        ],
+                        value="transcribe",
+                        label="Task",
+                    )
+
+                with gr.Row():
+                    language_input = gr.Textbox(
+                        label="Language (optional)",
+                        placeholder="e.g., english, german",
+                    )
+                    num_speakers_input = gr.Number(
+                        label="Number of speakers (optional)",
+                        precision=0,
+                    )
+
+                transcribe_btn = gr.Button("Start", variant="primary")
+
+            with gr.Column(scale=3):
+                output_text = gr.Textbox(
+                    label="Transcript",
+                    lines=10,
+                    interactive=False,
+                )
+                summary_text = gr.Textbox(
+                    label="Summary",
+                    lines=10,
+                    interactive=False,
+                    visible=False,
+                )
+                file_output = gr.File(
+                    label="Download transcript/summary",
+                )
+                status_text = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                )
+
+        # Footer
+        if footer_html:
+            gr.HTML(footer_html)
+
+        # Events
+
+        def on_task_change(value):
+            show_summary = (value == "transcript_and_summarize")
+            return gr.update(visible=show_summary)
+
+        task_choice.change(
+            fn=on_task_change,
+            inputs=[task_choice],
+            outputs=[summary_text],
+        )
+
+        def on_transcribe(audio, task, language, num_speakers):
+            if not audio:
+                return (
+                    "",
+                    "",
+                    None,
+                    "Please upload or record audio.",
+                )
+
+            transcript, summary, file_path, msg = run_transcribe(
+                audio_path=audio,
+                task=task,
+                language=language,
+                num_speakers=num_speakers,
+            )
+
+            show_summary = bool(summary)
+            return (
+                transcript,
+                summary,
+                file_path if file_path else None,
+                msg,
+            )
+
+        transcribe_btn.click(
+            fn=on_transcribe,
+            inputs=[
+                audio_input,
+                task_choice,
+                language_input,
+                num_speakers_input,
+            ],
+            outputs=[
+                output_text,
+                summary_text,
+                file_output,
+                status_text,
+            ],
+        )
+
+    # Launch options
+    server_name = launch_cfg.get("server_name", os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"))
+    server_port = launch_cfg.get("server_port", 7860)
+    favicon_path = launch_cfg.get("favicon_path", "/app/src/misc/logo.png")
+
+    app.launch(
+        server_name=str(server_name),
+        server_port=int(server_port),
+        favicon_path=favicon_path if os.path.exists(favicon_path) else None,
+        show_api=False,
+    )
+
+
+if __name__ == "__main__":
+    create_app()