From b0f19810d43150f569b50f6d9c1bff0328a59946 Mon Sep 17 00:00:00 2001 From: Admin Date: Sun, 14 Jun 2026 03:44:55 +0000 Subject: [PATCH] Fix: Improve LLM connectivity, add logging, increase timeout, update docs --- .env.example | 3 ++ README.md | 57 +++++++++++++++++++++ mcp_summary_server.py | 112 +++++++++++++++++++++++------------------- 3 files changed, 121 insertions(+), 51 deletions(-) diff --git a/.env.example b/.env.example index d4cfbbe..f975e02 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,9 @@ OPENAPI_URL=http://localhost:8080/v1 OPENAPI_API_KEY= MODEL_NAME=gpt-4o +# LLM Call Timeout in seconds (increase for large documents) +LLM_TIMEOUT=120 + # Summarization Configuration # Characters per chunk when splitting long text CHUNK_SIZE=4000 diff --git a/README.md b/README.md index ade9adf..e81fb22 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ cp .env.example .env | OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint | | OPENAPI_API_KEY | (empty) | LLM API key | | MODEL_NAME | gpt-4o | LLM model to use | +| LLM_TIMEOUT | 120 | LLM call timeout in seconds | | CHUNK_SIZE | 4000 | Characters per chunk | | OVERLAP | 200 | Characters of overlap between chunks | | TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary | @@ -59,6 +60,40 @@ pip install -r requirements.txt python mcp_summary_server.py ``` +## Connecting to OpenWebUI + +### In OpenWebUI Admin Settings + +1. Go to **Admin Settings → External Tools** +2. Click **+ (Add Server)** +3. Set **Type** to **MCP (Streamable HTTP)** +4. Enter your **Server URL** +5. Set **Authentication**: + - **None** if no API key is configured + - **Bearer** if API_KEY is set (provide the key) +6. Save + +### Docker Networking + +If running both OpenWebUI and MCP Summary in Docker: + +```bash +# Use host.docker.internal to reach host machine +docker run -p 8080:8080 \ + -e OPENAPI_URL=http://host.docker.internal:3000/v1 \ + -e OPENAPI_API_KEY=your-key \ + mcp-summary +``` + +If both containers are on the same Docker network, use the container name directly: + +```bash +docker run --network mynetwork -p 8080:8080 \ + -e OPENAPI_URL=http://openwebui-container:8080/v1 \ + -e OPENAPI_API_KEY=your-key \ + mcp-summary +``` + ## MCP Tool ### summarize_document @@ -78,3 +113,25 @@ Summarizes a document, automatically handling chunking for long text. "chunks": 1 // number of chunks used } ``` + +## Troubleshooting + +### "Failed to connect to MCP server" + +1. **Check authentication**: Ensure you haven't selected `Bearer` without a key. Switch to `None` if no token is needed. +2. **Check network connectivity**: Ensure OpenWebUI can reach the MCP server URL +3. **Check LLM connectivity**: Ensure the MCP server can reach the LLM at OPENAPI_URL +4. **Check timeouts**: Increase LLM_TIMEOUT if summarization takes too long + +### Infinite loading screen + +This may occur if you configured the server as OpenAPI instead of MCP. Fix by: + +1. Opening Admin Settings → External Tools +2. Disabling/deleting the problematic connection +3. Re-adding with **Type** set to **MCP (Streamable HTTP)** + +### Slow initialization + +If the server takes longer than 10 seconds to initialize: +- Increase `MCP_INITIALIZE_TIMEOUT` in OpenWebUI (default: 10 seconds) diff --git a/mcp_summary_server.py b/mcp_summary_server.py index 8d53383..29b1b9e 100644 --- a/mcp_summary_server.py +++ b/mcp_summary_server.py @@ -25,9 +25,15 @@ import json import os import sys import re +import logging from http.server import HTTPServer, BaseHTTPRequestHandler from typing import Any, Dict, List, Optional import requests +from requests.exceptions import RequestException + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger("mcp-summary") # MCP Server Configuration API_KEY = os.environ.get("API_KEY", "").strip() @@ -39,11 +45,14 @@ OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "") MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o") # Summarization Configuration -CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000")) # Characters per chunk -OVERLAP = int(os.environ.get("OVERLAP", "200")) # Characters of overlap between chunks -TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150")) # Words -MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100")) # Words for final summary -MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000")) # Characters before chunking +CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000")) +OVERLAP = int(os.environ.get("OVERLAP", "200")) +TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150")) +MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100")) +MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000")) + +# LLM call timeout in seconds - increase for large documents +LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120")) # Tool definitions TOOLS_LIST: Dict[str, Any] = { @@ -71,7 +80,7 @@ TOOLS_LIST: Dict[str, Any] = { def call_llm(messages: List[Dict], temperature: float = 0.3) -> str: - """Make an OpenAPI-compatible LLM call.""" + """Make an OpenAPI-compatible LLM call with error handling.""" url = f"{OPENAPI_URL}/chat/completions" headers = { "Content-Type": "application/json", @@ -86,11 +95,20 @@ def call_llm(messages: List[Dict], temperature: float = 0.3) -> str: "top_p": 0.9 } - response = requests.post(url, headers=headers, json=payload, timeout=60) - response.raise_for_status() + try: + logger.info(f"Calling LLM at {OPENAPI_URL} with model {MODEL_NAME}") + response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT) + response.raise_for_status() + + data = response.json() + return data["choices"][0]["message"]["content"] - data = response.json() - return data["choices"][0]["message"]["content"] + except RequestException as e: + logger.error(f"LLM request failed: {e}") + raise RuntimeError(f"Failed to connect to LLM at {OPENAPI_URL}: {str(e)}") + except Exception as e: + logger.error(f"LLM call failed: {e}") + raise RuntimeError(f"LLM call failed: {str(e)}") def chunk_text(text: str) -> List[str]: @@ -102,31 +120,16 @@ def chunk_text(text: str) -> List[str]: start = 0 while start < len(text): - # Find a good breaking point (after sentence or paragraph) end = min(start + CHUNK_SIZE, len(text)) - # Try to break at sentence boundary - search_end = min(end, len(text)) - break_point = -1 - - # Look for paragraph break first - for marker in ["\n\n", "\n"]: - pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end) - if pos > 0: + # Try to break at sentence/paragraph boundary + break_point = end + for marker in ["\n\n", "\n", ". ", "! ", "? "]: + pos = text.rfind(marker, start + CHUNK_SIZE // 2, end) + if pos > start: break_point = pos break - # If no paragraph break, look for sentence break - if break_point == -1: - for marker in [".", "!", "?"]: - pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end) - if pos > 0: - break_point = pos - break - - if break_point == -1: - break_point = end - chunk = text[start:break_point] if chunk.strip(): chunks.append(chunk) @@ -135,25 +138,26 @@ def chunk_text(text: str) -> List[str]: if start >= len(text): break + logger.info(f"Split text into {len(chunks)} chunks") return chunks def summarize_chunk(chunk: str, chunk_num: int, total_chunks: int) -> str: """Summarize a single chunk of text.""" - system_prompt = f"""You are a precise legal assistant specializing in creating concise, accurate summaries. + system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries. You are processing chunk {chunk_num} of {total_chunks} from a larger document. -Your task: Create a focused summary of this chunk that: -- Captures the key points and important details +Create a focused summary that: +- Captures key points and important details - Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words -- Can be combined with summaries of other chunks to form a complete picture +- Can be combined with other chunk summaries - Uses clear, professional language -- Preserves important names, dates, and specific facts +- Preserves names, dates, and specific facts -Format your response as plain text without bullet points or special formatting.""" +Respond as plain text without bullet points.""" - user_prompt = f"""Summarize the following text (chunk {chunk_num} of {total_chunks}): + user_prompt = f"""Summarize this text (chunk {chunk_num} of {total_chunks}): {text} @@ -164,6 +168,7 @@ Summary:""" {"role": "user", "content": user_prompt} ] + logger.info(f"Summarizing chunk {chunk_num}/{total_chunks}") return call_llm(messages) @@ -173,17 +178,17 @@ def synthesize_summaries(chunk_summaries: List[str]) -> str: system_prompt = """You are a precise legal assistant creating executive-level summaries. -Your task: Synthesize the provided partial summaries into a single, cohesive summary that: +Synthesize the provided partial summaries into a single, cohesive summary that: - Is approximately 100 words -- Captures the complete picture of the document +- Captures the complete document picture - Is clear and professional - Removes redundancy - Maintains logical flow - Preserves all critical information -Format your response as a single paragraph of plain text.""" +Format as a single paragraph of plain text.""" - user_prompt = f"""Synthesize the following partial summaries into one cohesive summary: + user_prompt = f"""Synthesize these partial summaries into one cohesive summary: {combined} @@ -194,6 +199,7 @@ Final summary:""" {"role": "user", "content": user_prompt} ] + logger.info(f"Synthesizing {len(chunk_summaries)} chunk summaries") return call_llm(messages) @@ -206,23 +212,23 @@ def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) - """ original_length = len(text) - # Strip whitespace and validate text = text.strip() if not text: raise ValueError("Empty text provided") + logger.info(f"Summarizing text of {original_length} characters") + # Direct summarization for shorter texts if len(text) <= MAX_DIRECT_TEXT_LENGTH: system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries. -Your task: Create a summary that: +Create a summary that: - Is approximately {max_length} words -- Captures the key points and important details +- Captures key points and important details - Uses clear, professional language -- Preserves important names, dates, and specific facts -- Is suitable for a legal professional +- Preserves names, dates, and specific facts -Format your response as plain text without bullet points or special formatting.""" +Format as plain text without bullet points.""" user_prompt = f"""Summarize the following document: @@ -247,13 +253,11 @@ Summary:""" # Chunked summarization for longer texts chunks = chunk_text(text) - # Summarize each chunk chunk_summaries = [] for i, chunk in enumerate(chunks, 1): chunk_summary = summarize_chunk(chunk, i, len(chunks)) chunk_summaries.append(chunk_summary) - # Synthesize into final summary final_summary = synthesize_summaries(chunk_summaries) return { @@ -268,8 +272,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler): """HTTP handler for MCP summary server.""" def log_message(self, format, *args): - # Quiet logs by default - pass + logger.info(format % args) def _send_json(self, status: int, payload: Any): """Send JSON response.""" @@ -304,6 +307,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler): "service": "mcp-summary", "transport": "streamable-http", "model": MODEL_NAME, + "status": "running", "docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)." }) return @@ -336,6 +340,8 @@ class MCPSummaryHandler(BaseHTTPRequestHandler): params = req.get("params") or {} req_id = req.get("id") + logger.info(f"MCP request: method={method}, id={req_id}") + # MCP: initialize if method == "initialize": self._send_json(200, { @@ -380,6 +386,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler): } }) except Exception as e: + logger.error(f"Tool call failed: {e}") self._send_json(200, { "jsonrpc": "2.0", "id": req_id, @@ -410,10 +417,13 @@ def main(): """Start the MCP summary server.""" server = HTTPServer(("0.0.0.0", PORT), MCPSummaryHandler) mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)" + print(f"MCP Summary Server listening on 0.0.0.0:{PORT} [{mode}]") print(f" - Model: {MODEL_NAME}") + print(f" - LLM URL: {OPENAPI_URL}") print(f" - Chunk size: {CHUNK_SIZE} characters") print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters") + print(f" - LLM timeout: {LLM_TIMEOUT} seconds") try: server.serve_forever()