Ensure Docker container always starts Web GUI (not CLI)

2026-06-14 05:11:38 +00:00
parent d854d498cd
commit 37d30e0ee2
4 changed files with 292 additions and 2 deletions
@@ -44,5 +44,6 @@ COPY misc /app/src/misc
 # Expose port (for Web GUI)
 EXPOSE 7860
-# Run the application
+# Run the Web GUI by default (never the CLI)
-ENTRYPOINT ["python3", "-m", "scraibe.cli"]
+# Use python -m scraibe so __main__.py forces web GUI.
 CMD ["python3", "-m", "scraibe"]
@@ -1,3 +1,5 @@
 tqdm>=4.66.5
 numpy>=1.26.4
 httpx>=0.28.0
 gradio>=5.0.0
 PyYAML>=6.0
@@ -0,0 +1,12 @@
 """
 Entrypoint for running ScrAIbe as a module:
    python -m scraibe
 Always launches the Web GUI (Gradio), never the CLI.
 """
 from .webui import create_app
 if __name__ == "__main__":
    create_app()
@@ -0,0 +1,275 @@
 """
 ScrAIbe Web GUI (Gradio)
 ------------------------
 Runs the Web GUI that:
 - Accepts audio uploads
 - Sends audio to LocalAI for transcription + diarization
 - Optionally sends transcript to a second LLM for summarization
 - Returns transcript (and summary) in the browser
 This is the default entrypoint when running in Docker.
 """
 import os
 import logging
 import tempfile
 import gradio as gr
 from .autotranscript import Scraibe
 from .misc import setup_logging
 logger = logging.getLogger("scraibe.webui")
 def load_config():
    """
    Load configuration from misc/config.yaml if present.
    Primary runtime configuration is via environment variables.
    """
    config_path = os.getenv("SCRAIBE_CONFIG", "/app/src/misc/config.yaml")
    config = {}
    if os.path.exists(config_path):
        try:
            import yaml
            with open(config_path, "r", encoding="utf-8") as f:
                config = yaml.safe_load(f) or {}
        except Exception as e:
            logger.warning("Failed to load config from %s: %s", config_path, e)
    return config
 def create_app():
    """
    Create and launch the Gradio Web GUI.
    """
    # Logging
    log_level = os.getenv("LOG_LEVEL", "INFO")
    setup_logging(level=log_level)
    # Load config (branding, layout, etc.)
    config = load_config()
    layout_cfg = config.get("layout", {})
    launch_cfg = config.get("launch", {})
    logger.info("Starting ScrAIbe Web GUI.")
    # Initialize Scraibe (LocalAI-backed)
    # If LocalAI is unreachable at startup, still launch the UI
    # and let individual transcription calls fail with a clear message.
    scraibe = None
    try:
        scraibe = Scraibe(verbose=True)
    except Exception as e:
        logger.warning(
            "Failed to initialize Scraibe at startup (LocalAI may be down): %s. "
            "Web GUI will start; transcription will fail until LocalAI is reachable.",
            e,
        )
    # Helper: run transcription via LocalAI API
    def run_transcribe(audio_path, task, language, num_speakers):
        if not audio_path:
            raise ValueError("No audio file provided.")
        try:
            if task == "transcript_and_summarize":
                result = scraibe.transcript_and_summarize(
                    audio_file=audio_path,
                    language=language or None,
                    num_speakers=int(num_speakers) if num_speakers else None,
                    verbose=True,
                )
                transcript_text = result.get("transcript", "")
                summary_text = result.get("summary", "")
                # Save as .md
                md_path = tempfile.mktemp(suffix=".md")
                with open(md_path, "w", encoding="utf-8") as f:
                    f.write("# Transcript\n\n")
                    f.write(transcript_text)
                    f.write("\n\n# Summary\n\n")
                    f.write(summary_text)
                return (
                    transcript_text,
                    summary_text,
                    md_path,
                    "Transcription and summarization completed.",
                )
            else:
                # Default: transcribe only
                text = scraibe.transcribe(
                    audio_file=audio_path,
                    language=language or None,
                    num_speakers=int(num_speakers) if num_speakers else None,
                    verbose=True,
                )
                # Save as .txt
                txt_path = tempfile.mktemp(suffix=".txt")
                with open(txt_path, "w", encoding="utf-8") as f:
                    f.write(text)
                return (
                    text,
                    "",
                    txt_path,
                    "Transcription completed.",
                )
        except Exception as e:
            logger.error("Error during transcription: %s", e)
            return (
                "",
                "",
                None,
                f"Error: {e}",
            )
    # Load header/footer HTML if present
    header_path = layout_cfg.get("header", "/app/src/misc/header.html")
    footer_path = layout_cfg.get("footer", "/app/src/misc/footer.html")
    header_html = ""
    footer_html = ""
    if header_path and os.path.exists(header_path):
        with open(header_path, "r", encoding="utf-8") as f:
            header_html = f.read()
    if footer_path and os.path.exists(footer_path):
        with open(footer_path, "r", encoding="utf-8") as f:
            footer_html = f.read()
    # Build Gradio interface
    with gr.Blocks(
        title="A.P.Strom Transcription",
        css="body { font-family: Arial, sans-serif; }",
    ) as app:
        # Header
        if header_html:
            gr.HTML(header_html)
        with gr.Row():
            with gr.Column(scale=2):
                audio_input = gr.Audio(
                    label="Upload or record audio",
                    type="filepath",
                )
                with gr.Row():
                    task_choice = gr.Radio(
                        choices=[
                            ("Transcribe", "transcribe"),
                            ("Transcript & Summarize", "transcript_and_summarize"),
                        ],
                        value="transcribe",
                        label="Task",
                    )
                with gr.Row():
                    language_input = gr.Textbox(
                        label="Language (optional)",
                        placeholder="e.g., english, german",
                    )
                    num_speakers_input = gr.Number(
                        label="Number of speakers (optional)",
                        precision=0,
                    )
                transcribe_btn = gr.Button("Start", variant="primary")
            with gr.Column(scale=3):
                output_text = gr.Textbox(
                    label="Transcript",
                    lines=10,
                    interactive=False,
                )
                summary_text = gr.Textbox(
                    label="Summary",
                    lines=10,
                    interactive=False,
                    visible=False,
                )
                file_output = gr.File(
                    label="Download transcript/summary",
                )
                status_text = gr.Textbox(
                    label="Status",
                    interactive=False,
                )
        # Footer
        if footer_html:
            gr.HTML(footer_html)
        # Events
        def on_task_change(value):
            show_summary = (value == "transcript_and_summarize")
            return gr.update(visible=show_summary)
        task_choice.change(
            fn=on_task_change,
            inputs=[task_choice],
            outputs=[summary_text],
        )
        def on_transcribe(audio, task, language, num_speakers):
            if not audio:
                return (
                    "",
                    "",
                    None,
                    "Please upload or record audio.",
                )
            transcript, summary, file_path, msg = run_transcribe(
                audio_path=audio,
                task=task,
                language=language,
                num_speakers=num_speakers,
            )
            show_summary = bool(summary)
            return (
                transcript,
                summary,
                file_path if file_path else None,
                msg,
            )
        transcribe_btn.click(
            fn=on_transcribe,
            inputs=[
                audio_input,
                task_choice,
                language_input,
                num_speakers_input,
            ],
            outputs=[
                output_text,
                summary_text,
                file_output,
                status_text,
            ],
        )
    # Launch options
    server_name = launch_cfg.get("server_name", os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"))
    server_port = launch_cfg.get("server_port", 7860)
    favicon_path = launch_cfg.get("favicon_path", "/app/src/misc/logo.png")
    app.launch(
        server_name=str(server_name),
        server_port=int(server_port),
        favicon_path=favicon_path if os.path.exists(favicon_path) else None,
        show_api=False,
    )
 if __name__ == "__main__":
    create_app()