Compare commits

..

5 Commits

7 changed files with 355 additions and 173 deletions
+32
View File
@@ -0,0 +1,32 @@
# MCP Summary Server - Environment Variables
# Server Configuration
PORT=8080
# Authentication (optional)
# If set, requests must include: Authorization: Bearer <API_KEY>
API_KEY=
# LLM Configuration
OPENAPI_URL=http://localhost:8080/v1
OPENAPI_API_KEY=
MODEL_NAME=gpt-4o
# LLM Call Timeout in seconds (increase for large documents)
LLM_TIMEOUT=120
# Summarization Configuration
# Characters per chunk when splitting long text
CHUNK_SIZE=4000
# Characters of overlap between chunks to maintain context
OVERLAP=200
# Target length for intermediate chunk summaries (words)
TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
# Maximum length for final synthesized summary (words)
MAX_DIRECT_SUMMARY_LENGTH=100
# Maximum text length (characters) before chunking is triggered
MAX_DIRECT_TEXT_LENGTH=8000
+37
View File
@@ -0,0 +1,37 @@
# Dockerfile for MCP Summary Server
#
# Usage (from directory containing this Dockerfile and mcp_summary_server.py):
#
# docker build -t mcp-summary .
# docker run -p 8080:8080 --env-file .env mcp-summary
#
FROM python:3.12-slim
WORKDIR /app
# Install runtime dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
# Copy the server script
COPY mcp_summary_server.py /app/mcp_summary_server.py
# Expose HTTP port
EXPOSE 8080
# Environment variables
ENV PORT=8080
ENV OPENAPI_URL=http://localhost:8080/v1
ENV OPENAPI_API_KEY=
ENV MODEL_NAME=gpt-4o
ENV CHUNK_SIZE=4000
ENV OVERLAP=200
ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
ENV MAX_DIRECT_SUMMARY_LENGTH=100
ENV MAX_DIRECT_TEXT_LENGTH=8000
ENV LLM_TIMEOUT=120
ENV API_KEY=
# Start the MCP summary server
ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"]
+137
View File
@@ -0,0 +1,137 @@
# MCP Summary Server
An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window.
## Features
- Automatically determines whether to summarize directly or use chunked summarization
- All processing happens server-side
- Returns only the summary to the client
- Configurable chunking parameters
- Bearer token authentication (optional)
## Setup
### Environment Variables
Copy `.env.example` to `.env` and configure:
```bash
cp .env.example .env
```
| Variable | Default | Description |
|----------|---------|-------------|
| PORT | 8080 | HTTP server port |
| API_KEY | (empty) | Bearer token for authentication |
| OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
| OPENAPI_API_KEY | (empty) | LLM API key |
| MODEL_NAME | gpt-4o | LLM model to use |
| LLM_TIMEOUT | 120 | LLM call timeout in seconds |
| CHUNK_SIZE | 4000 | Characters per chunk |
| OVERLAP | 200 | Characters of overlap between chunks |
| TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
| MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length |
| MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking |
## Running
### Docker
```bash
# Build
docker build -t mcp-summary .
# Run with environment file
docker run -p 8080:8080 --env-file .env mcp-summary
# Run with inline environment variables
docker run -p 8080:8080 \
-e OPENAPI_URL=http://localhost:8080/v1 \
-e OPENAPI_API_KEY=your-key \
-e MODEL_NAME=gpt-4o \
mcp-summary
```
### Python
```bash
pip install -r requirements.txt
python mcp_summary_server.py
```
## Connecting to OpenWebUI
### In OpenWebUI Admin Settings
1. Go to **Admin Settings → External Tools**
2. Click **+ (Add Server)**
3. Set **Type** to **MCP (Streamable HTTP)**
4. Enter your **Server URL**
5. Set **Authentication**:
- **None** if no API key is configured
- **Bearer** if API_KEY is set (provide the key)
6. Save
### Docker Networking
If running both OpenWebUI and MCP Summary in Docker:
```bash
# Use host.docker.internal to reach host machine
docker run -p 8080:8080 \
-e OPENAPI_URL=http://host.docker.internal:3000/v1 \
-e OPENAPI_API_KEY=your-key \
mcp-summary
```
If both containers are on the same Docker network, use the container name directly:
```bash
docker run --network mynetwork -p 8080:8080 \
-e OPENAPI_URL=http://openwebui-container:8080/v1 \
-e OPENAPI_API_KEY=your-key \
mcp-summary
```
## MCP Tool
### summarize_document
Summarizes a document, automatically handling chunking for long text.
**Parameters:**
- `text` (string, required): The document text to summarize
- `max_length` (integer, optional): Maximum summary length in words (default: 100)
**Returns:**
```json
{
"summary": "The summarized text...",
"original_length": 12345,
"method": "direct", // or "chunked"
"chunks": 1 // number of chunks used
}
```
## Troubleshooting
### "Failed to connect to MCP server"
1. **Check authentication**: Ensure you haven't selected `Bearer` without a key. Switch to `None` if no token is needed.
2. **Check network connectivity**: Ensure OpenWebUI can reach the MCP server URL
3. **Check LLM connectivity**: Ensure the MCP server can reach the LLM at OPENAPI_URL
4. **Check timeouts**: Increase LLM_TIMEOUT if summarization takes too long
### Infinite loading screen
This may occur if you configured the server as OpenAPI instead of MCP. Fix by:
1. Opening Admin Settings → External Tools
2. Disabling/deleting the problematic connection
3. Re-adding with **Type** set to **MCP (Streamable HTTP)**
### Slow initialization
If the server takes longer than 10 seconds to initialize:
- Increase `MCP_INITIALIZE_TIMEOUT` in OpenWebUI (default: 10 seconds)
Binary file not shown.
+34
View File
@@ -0,0 +1,34 @@
#!/bin/bash
# Diagnostic script for MCP Summary Server
echo "================================"
echo "MCP Summary Server Diagnostics"
echo "================================"
# Check if server is running
echo -e "\n1. Checking if server process is running..."
ps aux | grep mcp_summary_server || echo "Server process not found"
# Check if port is listening
echo -e "\n2. Checking if port is listening..."
netstat -tlnp 2>/dev/null | grep 8080 || echo "Port 8080 not listening"
# Test basic connectivity
echo -e "\n3. Testing basic connectivity..."
curl -s http://localhost:8080/ || echo "Cannot connect to localhost:8080"
# Test MCP initialize
echo -e "\n4. Testing MCP initialize..."
curl -s -X POST http://localhost:8080/ \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-11-25","capabilities":{},"clientInfo":{"name":"test","version":"1.0.0"}}}' | jq .
# Test tools list
echo -e "\n5. Testing tools list..."
curl -s -X POST http://localhost:8080/ \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' | jq .
echo -e "\n================================"
echo "Diagnostics complete"
echo "================================"
+94 -156
View File
@@ -24,32 +24,11 @@ Auth:
import json import json
import os import os
import sys import sys
import logging
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from typing import Any, Dict, List, Optional from typing import Any, Dict, Optional
import requests import requests
from requests.exceptions import RequestException
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger("mcp-summary")
# MCP Server Configuration
API_KEY = os.environ.get("API_KEY", "").strip() API_KEY = os.environ.get("API_KEY", "").strip()
PORT = int(os.environ.get("PORT", "8080"))
# LLM Configuration
OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
# Summarization Configuration
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))
OVERLAP = int(os.environ.get("OVERLAP", "200"))
TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100"))
MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120"))
# Tool definitions # Tool definitions
TOOLS_LIST: Dict[str, Any] = { TOOLS_LIST: Dict[str, Any] = {
@@ -85,7 +64,8 @@ def get_bearer_token(headers: Any) -> Optional[str]:
def require_auth(headers: Any) -> bool: def require_auth(headers: Any) -> bool:
"""Check authentication. Returns True if auth passes or is not required.""" """Check authentication if API key is configured."""
# If API_KEY is not set, allow unauthenticated access
if not API_KEY: if not API_KEY:
return True return True
@@ -95,52 +75,55 @@ def require_auth(headers: Any) -> bool:
return True return True
def call_llm(messages: List[Dict], temperature: float = 0.3) -> str: def call_llm(text: str, system_prompt: str, max_tokens: int = 2000) -> str:
"""Make an OpenAPI-compatible LLM call with error handling.""" """Make an OpenAPI-compatible LLM call."""
url = f"{OPENAPI_URL}/chat/completions" openapi_url = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
openapi_api_key = os.environ.get("OPENAPI_API_KEY", "")
model_name = os.environ.get("MODEL_NAME", "gpt-4o")
timeout = int(os.environ.get("LLM_TIMEOUT", "120"))
url = f"{openapi_url}/chat/completions"
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer {OPENAPI_API_KEY}" "Authorization": f"Bearer {openapi_api_key}"
} }
payload = { payload = {
"model": MODEL_NAME, "model": model_name,
"messages": messages, "messages": [
"temperature": temperature, {"role": "system", "content": system_prompt},
"max_tokens": 2000, {"role": "user", "content": text}
],
"temperature": 0.3,
"max_tokens": max_tokens,
"top_p": 0.9 "top_p": 0.9
} }
try: response = requests.post(url, headers=headers, json=payload, timeout=timeout)
logger.info(f"Calling LLM at {OPENAPI_URL} with model {MODEL_NAME}") response.raise_for_status()
response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT)
response.raise_for_status()
data = response.json() data = response.json()
return data["choices"][0]["message"]["content"] return data["choices"][0]["message"]["content"]
except RequestException as e:
logger.error(f"LLM request failed: {e}")
raise RuntimeError(f"Failed to connect to LLM at {OPENAPI_URL}: {str(e)}")
except Exception as e:
logger.error(f"LLM call failed: {e}")
raise RuntimeError(f"LLM call failed: {str(e)}")
def chunk_text(text: str) -> List[str]: def chunk_text(text: str) -> list:
"""Split text into chunks with overlap for summarization.""" """Split text into chunks with overlap for summarization."""
if len(text) <= CHUNK_SIZE: chunk_size = int(os.environ.get("CHUNK_SIZE", "4000"))
overlap = int(os.environ.get("OVERLAP", "200"))
if len(text) <= chunk_size:
return [text] return [text]
chunks = [] chunks = []
start = 0 start = 0
while start < len(text): while start < len(text):
end = min(start + CHUNK_SIZE, len(text)) end = min(start + chunk_size, len(text))
# Try to break at sentence/paragraph boundary
break_point = end break_point = end
for marker in ["\n\n", "\n", ". ", "! ", "? "]: for marker in ["\n\n", "\n", ". ", "! ", "? "]:
pos = text.rfind(marker, start + CHUNK_SIZE // 2, end) pos = text.rfind(marker, start + chunk_size // 2, end)
if pos > start: if pos > start:
break_point = pos break_point = pos
break break
@@ -149,46 +132,84 @@ def chunk_text(text: str) -> List[str]:
if chunk.strip(): if chunk.strip():
chunks.append(chunk) chunks.append(chunk)
start = break_point - OVERLAP if break_point < len(text) else len(text) start = break_point - overlap if break_point < len(text) else len(text)
if start >= len(text): if start >= len(text):
break break
logger.info(f"Split text into {len(chunks)} chunks")
return chunks return chunks
def summarize_chunk(chunk_text: str, chunk_num: int, total_chunks: int) -> str: def summarize_document(text: str, max_length: int = 100) -> dict:
"""Summarize a single chunk of text.""" """
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries. Main summarization function.
You are processing chunk {chunk_num} of {total_chunks} from a larger document. - If text is short, summarize directly
- If text is long, chunk and summarize each chunk, then synthesize
"""
original_length = len(text)
text = text.strip()
if not text:
raise ValueError("Empty text provided")
max_direct_length = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
intermediate_length = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
# Direct summarization for shorter texts
if len(text) <= max_direct_length:
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
Create a summary that:
- Is approximately {max_length} words
- Captures key points and important details
- Uses clear, professional language
- Preserves names, dates, and specific facts
Format as plain text without bullet points."""
user_prompt = f"""Summarize the following document:
{text}
Summary:"""
summary = call_llm(user_prompt, system_prompt)
return {
"summary": summary,
"original_length": original_length,
"method": "direct",
"chunks": 1
}
# Chunked summarization for longer texts
chunks = chunk_text(text)
chunk_summaries = []
for i, chunk in enumerate(chunks, 1):
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
You are processing chunk {i} of {len(chunks)} from a larger document.
Create a focused summary that: Create a focused summary that:
- Captures key points and important details - Captures key points and important details
- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words - Is approximately {intermediate_length} words
- Can be combined with other chunk summaries - Can be combined with other chunk summaries
- Uses clear, professional language - Uses clear, professional language
- Preserves names, dates, and specific facts - Preserves names, dates, and specific facts
Respond as plain text without bullet points.""" Respond as plain text without bullet points."""
user_prompt = f"""Summarize this text (chunk {chunk_num} of {total_chunks}): user_prompt = f"""Summarize this text (chunk {i} of {len(chunks)}):
{chunk_text} {chunk}
Summary:""" Summary:"""
messages = [ chunk_summary = call_llm(user_prompt, system_prompt)
{"role": "system", "content": system_prompt}, chunk_summaries.append(chunk_summary)
{"role": "user", "content": user_prompt}
]
logger.info(f"Summarizing chunk {chunk_num}/{total_chunks}") # Synthesize into final summary
return call_llm(messages)
def synthesize_summaries(chunk_summaries: List[str]) -> str:
"""Synthesize multiple chunk summaries into a single final summary."""
combined = "\n\n".join(chunk_summaries) combined = "\n\n".join(chunk_summaries)
system_prompt = """You are a precise legal assistant creating executive-level summaries. system_prompt = """You are a precise legal assistant creating executive-level summaries.
@@ -209,71 +230,7 @@ Format as a single paragraph of plain text."""
Final summary:""" Final summary:"""
messages = [ final_summary = call_llm(user_prompt, system_prompt)
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
logger.info(f"Synthesizing {len(chunk_summaries)} chunk summaries")
return call_llm(messages)
def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]:
"""
Main summarization function.
- If text is short, summarize directly
- If text is long, chunk and summarize each chunk, then synthesize
"""
original_length = len(text)
text = text.strip()
if not text:
raise ValueError("Empty text provided")
logger.info(f"Summarizing text of {original_length} characters")
# Direct summarization for shorter texts
if len(text) <= MAX_DIRECT_TEXT_LENGTH:
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
Create a summary that:
- Is approximately {max_length} words
- Captures key points and important details
- Uses clear, professional language
- Preserves names, dates, and specific facts
Format as plain text without bullet points."""
user_prompt = f"""Summarize the following document:
{text}
Summary:"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
summary = call_llm(messages)
return {
"summary": summary,
"original_length": original_length,
"method": "direct",
"chunks": 1
}
# Chunked summarization for longer texts
chunks = chunk_text(text)
chunk_summaries = []
for i, chunk in enumerate(chunks, 1):
chunk_summary = summarize_chunk(chunk, i, len(chunks))
chunk_summaries.append(chunk_summary)
final_summary = synthesize_summaries(chunk_summaries)
return { return {
"summary": final_summary, "summary": final_summary,
@@ -287,7 +244,8 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
"""HTTP handler for MCP summary server.""" """HTTP handler for MCP summary server."""
def log_message(self, format, *args): def log_message(self, format, *args):
logger.info(format % args) # Quiet logs by default
pass
def _send_json(self, status: int, payload: Any): def _send_json(self, status: int, payload: Any):
"""Send JSON response.""" """Send JSON response."""
@@ -298,8 +256,8 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(body) self.wfile.write(body)
def _auth_or_401(self): def _auth_or_401(self) -> bool:
"""Check authentication. Returns False if auth fails.""" """Check authentication if API key is configured."""
try: try:
return require_auth(self.headers) return require_auth(self.headers)
except PermissionError: except PermissionError:
@@ -312,8 +270,6 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
self._send_json(200, { self._send_json(200, {
"service": "mcp-summary", "service": "mcp-summary",
"transport": "streamable-http", "transport": "streamable-http",
"model": MODEL_NAME,
"status": "running",
"docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)." "docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
}) })
return return
@@ -322,7 +278,6 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
def do_POST(self): def do_POST(self):
"""Handle MCP JSON-RPC requests.""" """Handle MCP JSON-RPC requests."""
# Streamable HTTP MCP endpoint
if self.path not in ("/", "/mcp"): if self.path not in ("/", "/mcp"):
self.send_error(404, "Not Found") self.send_error(404, "Not Found")
return return
@@ -346,8 +301,6 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
params = req.get("params") or {} params = req.get("params") or {}
req_id = req.get("id") req_id = req.get("id")
logger.info(f"MCP request: method={method}, id={req_id}")
# MCP: initialize # MCP: initialize
if method == "initialize": if method == "initialize":
self._send_json(200, { self._send_json(200, {
@@ -366,15 +319,6 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
}) })
return return
# MCP: ping
if method == "ping":
self._send_json(200, {
"jsonrpc": "2.0",
"id": req_id,
"result": {}
})
return
# MCP: tools/list # MCP: tools/list
if method == "tools/list": if method == "tools/list":
self._send_json(200, { self._send_json(200, {
@@ -400,7 +344,6 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
} }
}) })
except Exception as e: except Exception as e:
logger.error(f"Tool call failed: {e}", exc_info=True)
self._send_json(200, { self._send_json(200, {
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": req_id, "id": req_id,
@@ -421,7 +364,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
if not text: if not text:
raise ValueError("Text parameter is required") raise ValueError("Text parameter is required")
max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH) max_length = args.get("max_length", 100)
return summarize_document(text, max_length) return summarize_document(text, max_length)
raise ValueError(f"Unknown tool: {name}") raise ValueError(f"Unknown tool: {name}")
@@ -433,11 +376,6 @@ def main():
server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler) server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler)
mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)" mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
print(f"MCP Summary Server listening on 0.0.0.0:{port} [{mode}]") print(f"MCP Summary Server listening on 0.0.0.0:{port} [{mode}]")
print(f" - Model: {MODEL_NAME}")
print(f" - LLM URL: {OPENAPI_URL}")
print(f" - Chunk size: {CHUNK_SIZE} characters")
print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
print(f" - LLM timeout: {LLM_TIMEOUT} seconds")
try: try:
server.serve_forever() server.serve_forever()
except KeyboardInterrupt: except KeyboardInterrupt:
+4
View File
@@ -0,0 +1,4 @@
# requirements.txt for MCP Summary Server
# HTTP requests for LLM communication
requests>=2.31.0