Fix: Improve LLM connectivity, add logging, increase timeout, update docs
This commit is contained in:
@@ -12,6 +12,9 @@ OPENAPI_URL=http://localhost:8080/v1
|
||||
OPENAPI_API_KEY=
|
||||
MODEL_NAME=gpt-4o
|
||||
|
||||
# LLM Call Timeout in seconds (increase for large documents)
|
||||
LLM_TIMEOUT=120
|
||||
|
||||
# Summarization Configuration
|
||||
# Characters per chunk when splitting long text
|
||||
CHUNK_SIZE=4000
|
||||
|
||||
@@ -27,6 +27,7 @@ cp .env.example .env
|
||||
| OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
|
||||
| OPENAPI_API_KEY | (empty) | LLM API key |
|
||||
| MODEL_NAME | gpt-4o | LLM model to use |
|
||||
| LLM_TIMEOUT | 120 | LLM call timeout in seconds |
|
||||
| CHUNK_SIZE | 4000 | Characters per chunk |
|
||||
| OVERLAP | 200 | Characters of overlap between chunks |
|
||||
| TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
|
||||
@@ -59,6 +60,40 @@ pip install -r requirements.txt
|
||||
python mcp_summary_server.py
|
||||
```
|
||||
|
||||
## Connecting to OpenWebUI
|
||||
|
||||
### In OpenWebUI Admin Settings
|
||||
|
||||
1. Go to **Admin Settings → External Tools**
|
||||
2. Click **+ (Add Server)**
|
||||
3. Set **Type** to **MCP (Streamable HTTP)**
|
||||
4. Enter your **Server URL**
|
||||
5. Set **Authentication**:
|
||||
- **None** if no API key is configured
|
||||
- **Bearer** if API_KEY is set (provide the key)
|
||||
6. Save
|
||||
|
||||
### Docker Networking
|
||||
|
||||
If running both OpenWebUI and MCP Summary in Docker:
|
||||
|
||||
```bash
|
||||
# Use host.docker.internal to reach host machine
|
||||
docker run -p 8080:8080 \
|
||||
-e OPENAPI_URL=http://host.docker.internal:3000/v1 \
|
||||
-e OPENAPI_API_KEY=your-key \
|
||||
mcp-summary
|
||||
```
|
||||
|
||||
If both containers are on the same Docker network, use the container name directly:
|
||||
|
||||
```bash
|
||||
docker run --network mynetwork -p 8080:8080 \
|
||||
-e OPENAPI_URL=http://openwebui-container:8080/v1 \
|
||||
-e OPENAPI_API_KEY=your-key \
|
||||
mcp-summary
|
||||
```
|
||||
|
||||
## MCP Tool
|
||||
|
||||
### summarize_document
|
||||
@@ -78,3 +113,25 @@ Summarizes a document, automatically handling chunking for long text.
|
||||
"chunks": 1 // number of chunks used
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "Failed to connect to MCP server"
|
||||
|
||||
1. **Check authentication**: Ensure you haven't selected `Bearer` without a key. Switch to `None` if no token is needed.
|
||||
2. **Check network connectivity**: Ensure OpenWebUI can reach the MCP server URL
|
||||
3. **Check LLM connectivity**: Ensure the MCP server can reach the LLM at OPENAPI_URL
|
||||
4. **Check timeouts**: Increase LLM_TIMEOUT if summarization takes too long
|
||||
|
||||
### Infinite loading screen
|
||||
|
||||
This may occur if you configured the server as OpenAPI instead of MCP. Fix by:
|
||||
|
||||
1. Opening Admin Settings → External Tools
|
||||
2. Disabling/deleting the problematic connection
|
||||
3. Re-adding with **Type** set to **MCP (Streamable HTTP)**
|
||||
|
||||
### Slow initialization
|
||||
|
||||
If the server takes longer than 10 seconds to initialize:
|
||||
- Increase `MCP_INITIALIZE_TIMEOUT` in OpenWebUI (default: 10 seconds)
|
||||
|
||||
+59
-49
@@ -25,9 +25,15 @@ import json
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import logging
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("mcp-summary")
|
||||
|
||||
# MCP Server Configuration
|
||||
API_KEY = os.environ.get("API_KEY", "").strip()
|
||||
@@ -39,11 +45,14 @@ OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
|
||||
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
|
||||
|
||||
# Summarization Configuration
|
||||
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000")) # Characters per chunk
|
||||
OVERLAP = int(os.environ.get("OVERLAP", "200")) # Characters of overlap between chunks
|
||||
TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150")) # Words
|
||||
MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100")) # Words for final summary
|
||||
MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000")) # Characters before chunking
|
||||
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))
|
||||
OVERLAP = int(os.environ.get("OVERLAP", "200"))
|
||||
TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
|
||||
MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100"))
|
||||
MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
|
||||
|
||||
# LLM call timeout in seconds - increase for large documents
|
||||
LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120"))
|
||||
|
||||
# Tool definitions
|
||||
TOOLS_LIST: Dict[str, Any] = {
|
||||
@@ -71,7 +80,7 @@ TOOLS_LIST: Dict[str, Any] = {
|
||||
|
||||
|
||||
def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
|
||||
"""Make an OpenAPI-compatible LLM call."""
|
||||
"""Make an OpenAPI-compatible LLM call with error handling."""
|
||||
url = f"{OPENAPI_URL}/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
@@ -86,12 +95,21 @@ def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
|
||||
"top_p": 0.9
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=60)
|
||||
try:
|
||||
logger.info(f"Calling LLM at {OPENAPI_URL} with model {MODEL_NAME}")
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
|
||||
except RequestException as e:
|
||||
logger.error(f"LLM request failed: {e}")
|
||||
raise RuntimeError(f"Failed to connect to LLM at {OPENAPI_URL}: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"LLM call failed: {e}")
|
||||
raise RuntimeError(f"LLM call failed: {str(e)}")
|
||||
|
||||
|
||||
def chunk_text(text: str) -> List[str]:
|
||||
"""Split text into chunks with overlap for summarization."""
|
||||
@@ -102,30 +120,15 @@ def chunk_text(text: str) -> List[str]:
|
||||
start = 0
|
||||
|
||||
while start < len(text):
|
||||
# Find a good breaking point (after sentence or paragraph)
|
||||
end = min(start + CHUNK_SIZE, len(text))
|
||||
|
||||
# Try to break at sentence boundary
|
||||
search_end = min(end, len(text))
|
||||
break_point = -1
|
||||
|
||||
# Look for paragraph break first
|
||||
for marker in ["\n\n", "\n"]:
|
||||
pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end)
|
||||
if pos > 0:
|
||||
break_point = pos
|
||||
break
|
||||
|
||||
# If no paragraph break, look for sentence break
|
||||
if break_point == -1:
|
||||
for marker in [".", "!", "?"]:
|
||||
pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end)
|
||||
if pos > 0:
|
||||
break_point = pos
|
||||
break
|
||||
|
||||
if break_point == -1:
|
||||
# Try to break at sentence/paragraph boundary
|
||||
break_point = end
|
||||
for marker in ["\n\n", "\n", ". ", "! ", "? "]:
|
||||
pos = text.rfind(marker, start + CHUNK_SIZE // 2, end)
|
||||
if pos > start:
|
||||
break_point = pos
|
||||
break
|
||||
|
||||
chunk = text[start:break_point]
|
||||
if chunk.strip():
|
||||
@@ -135,25 +138,26 @@ def chunk_text(text: str) -> List[str]:
|
||||
if start >= len(text):
|
||||
break
|
||||
|
||||
logger.info(f"Split text into {len(chunks)} chunks")
|
||||
return chunks
|
||||
|
||||
|
||||
def summarize_chunk(chunk: str, chunk_num: int, total_chunks: int) -> str:
|
||||
"""Summarize a single chunk of text."""
|
||||
system_prompt = f"""You are a precise legal assistant specializing in creating concise, accurate summaries.
|
||||
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
||||
|
||||
You are processing chunk {chunk_num} of {total_chunks} from a larger document.
|
||||
|
||||
Your task: Create a focused summary of this chunk that:
|
||||
- Captures the key points and important details
|
||||
Create a focused summary that:
|
||||
- Captures key points and important details
|
||||
- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words
|
||||
- Can be combined with summaries of other chunks to form a complete picture
|
||||
- Can be combined with other chunk summaries
|
||||
- Uses clear, professional language
|
||||
- Preserves important names, dates, and specific facts
|
||||
- Preserves names, dates, and specific facts
|
||||
|
||||
Format your response as plain text without bullet points or special formatting."""
|
||||
Respond as plain text without bullet points."""
|
||||
|
||||
user_prompt = f"""Summarize the following text (chunk {chunk_num} of {total_chunks}):
|
||||
user_prompt = f"""Summarize this text (chunk {chunk_num} of {total_chunks}):
|
||||
|
||||
{text}
|
||||
|
||||
@@ -164,6 +168,7 @@ Summary:"""
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
logger.info(f"Summarizing chunk {chunk_num}/{total_chunks}")
|
||||
return call_llm(messages)
|
||||
|
||||
|
||||
@@ -173,17 +178,17 @@ def synthesize_summaries(chunk_summaries: List[str]) -> str:
|
||||
|
||||
system_prompt = """You are a precise legal assistant creating executive-level summaries.
|
||||
|
||||
Your task: Synthesize the provided partial summaries into a single, cohesive summary that:
|
||||
Synthesize the provided partial summaries into a single, cohesive summary that:
|
||||
- Is approximately 100 words
|
||||
- Captures the complete picture of the document
|
||||
- Captures the complete document picture
|
||||
- Is clear and professional
|
||||
- Removes redundancy
|
||||
- Maintains logical flow
|
||||
- Preserves all critical information
|
||||
|
||||
Format your response as a single paragraph of plain text."""
|
||||
Format as a single paragraph of plain text."""
|
||||
|
||||
user_prompt = f"""Synthesize the following partial summaries into one cohesive summary:
|
||||
user_prompt = f"""Synthesize these partial summaries into one cohesive summary:
|
||||
|
||||
{combined}
|
||||
|
||||
@@ -194,6 +199,7 @@ Final summary:"""
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
logger.info(f"Synthesizing {len(chunk_summaries)} chunk summaries")
|
||||
return call_llm(messages)
|
||||
|
||||
|
||||
@@ -206,23 +212,23 @@ def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -
|
||||
"""
|
||||
original_length = len(text)
|
||||
|
||||
# Strip whitespace and validate
|
||||
text = text.strip()
|
||||
if not text:
|
||||
raise ValueError("Empty text provided")
|
||||
|
||||
logger.info(f"Summarizing text of {original_length} characters")
|
||||
|
||||
# Direct summarization for shorter texts
|
||||
if len(text) <= MAX_DIRECT_TEXT_LENGTH:
|
||||
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
||||
|
||||
Your task: Create a summary that:
|
||||
Create a summary that:
|
||||
- Is approximately {max_length} words
|
||||
- Captures the key points and important details
|
||||
- Captures key points and important details
|
||||
- Uses clear, professional language
|
||||
- Preserves important names, dates, and specific facts
|
||||
- Is suitable for a legal professional
|
||||
- Preserves names, dates, and specific facts
|
||||
|
||||
Format your response as plain text without bullet points or special formatting."""
|
||||
Format as plain text without bullet points."""
|
||||
|
||||
user_prompt = f"""Summarize the following document:
|
||||
|
||||
@@ -247,13 +253,11 @@ Summary:"""
|
||||
# Chunked summarization for longer texts
|
||||
chunks = chunk_text(text)
|
||||
|
||||
# Summarize each chunk
|
||||
chunk_summaries = []
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
chunk_summary = summarize_chunk(chunk, i, len(chunks))
|
||||
chunk_summaries.append(chunk_summary)
|
||||
|
||||
# Synthesize into final summary
|
||||
final_summary = synthesize_summaries(chunk_summaries)
|
||||
|
||||
return {
|
||||
@@ -268,8 +272,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
||||
"""HTTP handler for MCP summary server."""
|
||||
|
||||
def log_message(self, format, *args):
|
||||
# Quiet logs by default
|
||||
pass
|
||||
logger.info(format % args)
|
||||
|
||||
def _send_json(self, status: int, payload: Any):
|
||||
"""Send JSON response."""
|
||||
@@ -304,6 +307,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
||||
"service": "mcp-summary",
|
||||
"transport": "streamable-http",
|
||||
"model": MODEL_NAME,
|
||||
"status": "running",
|
||||
"docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
|
||||
})
|
||||
return
|
||||
@@ -336,6 +340,8 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
||||
params = req.get("params") or {}
|
||||
req_id = req.get("id")
|
||||
|
||||
logger.info(f"MCP request: method={method}, id={req_id}")
|
||||
|
||||
# MCP: initialize
|
||||
if method == "initialize":
|
||||
self._send_json(200, {
|
||||
@@ -380,6 +386,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Tool call failed: {e}")
|
||||
self._send_json(200, {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
@@ -410,10 +417,13 @@ def main():
|
||||
"""Start the MCP summary server."""
|
||||
server = HTTPServer(("0.0.0.0", PORT), MCPSummaryHandler)
|
||||
mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
|
||||
|
||||
print(f"MCP Summary Server listening on 0.0.0.0:{PORT} [{mode}]")
|
||||
print(f" - Model: {MODEL_NAME}")
|
||||
print(f" - LLM URL: {OPENAPI_URL}")
|
||||
print(f" - Chunk size: {CHUNK_SIZE} characters")
|
||||
print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
|
||||
print(f" - LLM timeout: {LLM_TIMEOUT} seconds")
|
||||
|
||||
try:
|
||||
server.serve_forever()
|
||||
|
||||
Reference in New Issue
Block a user