Initial commit: MCP Summary Server
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
# MCP Summary Server - Environment Variables
|
||||
|
||||
# Server Configuration
|
||||
PORT=8080
|
||||
|
||||
# Authentication (optional)
|
||||
# If set, requests must include: Authorization: Bearer <API_KEY>
|
||||
API_KEY=
|
||||
|
||||
# LLM Configuration
|
||||
OPENAPI_URL=http://localhost:8080/v1
|
||||
OPENAPI_API_KEY=
|
||||
MODEL_NAME=gpt-4o
|
||||
|
||||
# Summarization Configuration
|
||||
# Characters per chunk when splitting long text
|
||||
CHUNK_SIZE=4000
|
||||
|
||||
# Characters of overlap between chunks to maintain context
|
||||
OVERLAP=200
|
||||
|
||||
# Target length for intermediate chunk summaries (words)
|
||||
TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
|
||||
|
||||
# Maximum length for final synthesized summary (words)
|
||||
MAX_DIRECT_SUMMARY_LENGTH=100
|
||||
|
||||
# Maximum text length (characters) before chunking is triggered
|
||||
MAX_DIRECT_TEXT_LENGTH=8000
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
# Dockerfile for MCP Summary Server
|
||||
#
|
||||
# Usage (from directory containing this Dockerfile and mcp_summary_server.py):
|
||||
#
|
||||
# docker build -t mcp-summary .
|
||||
# docker run -p 8080:8080 --env-file .env mcp-summary
|
||||
#
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install runtime dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
|
||||
|
||||
# Copy the server script
|
||||
COPY mcp_summary_server.py /app/mcp_summary_server.py
|
||||
|
||||
# Expose HTTP port
|
||||
EXPOSE 8080
|
||||
|
||||
# Environment variables
|
||||
ENV PORT=8080
|
||||
ENV OPENAPI_URL=http://localhost:8080/v1
|
||||
ENV OPENAPI_API_KEY=""
|
||||
ENV MODEL_NAME=gpt-4o
|
||||
ENV CHUNK_SIZE=4000
|
||||
ENV OVERLAP=200
|
||||
ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
|
||||
ENV MAX_DIRECT_SUMMARY_LENGTH=100
|
||||
ENV MAX_DIRECT_TEXT_LENGTH=8000
|
||||
ENV API_KEY=""
|
||||
|
||||
# Start the MCP summary server
|
||||
ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"]
|
||||
@@ -0,0 +1,80 @@
|
||||
# MCP Summary Server
|
||||
|
||||
An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window.
|
||||
|
||||
## Features
|
||||
|
||||
- Automatically determines whether to summarize directly or use chunked summarization
|
||||
- All processing happens server-side
|
||||
- Returns only the summary to the client
|
||||
- Configurable chunking parameters
|
||||
- Bearer token authentication (optional)
|
||||
|
||||
## Setup
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Copy `.env.example` to `.env` and configure:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| PORT | 8080 | HTTP server port |
|
||||
| API_KEY | (empty) | Bearer token for authentication |
|
||||
| OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
|
||||
| OPENAPI_API_KEY | (empty) | LLM API key |
|
||||
| MODEL_NAME | gpt-4o | LLM model to use |
|
||||
| CHUNK_SIZE | 4000 | Characters per chunk |
|
||||
| OVERLAP | 200 | Characters of overlap between chunks |
|
||||
| TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
|
||||
| MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length |
|
||||
| MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking |
|
||||
|
||||
## Running
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
# Build
|
||||
docker build -t mcp-summary .
|
||||
|
||||
# Run with environment file
|
||||
docker run -p 8080:8080 --env-file .env mcp-summary
|
||||
|
||||
# Run with inline environment variables
|
||||
docker run -p 8080:8080 \
|
||||
-e OPENAPI_URL=http://localhost:8080/v1 \
|
||||
-e OPENAPI_API_KEY=your-key \
|
||||
-e MODEL_NAME=gpt-4o \
|
||||
mcp-summary
|
||||
```
|
||||
|
||||
### Python
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
python mcp_summary_server.py
|
||||
```
|
||||
|
||||
## MCP Tool
|
||||
|
||||
### summarize_document
|
||||
|
||||
Summarizes a document, automatically handling chunking for long text.
|
||||
|
||||
**Parameters:**
|
||||
- `text` (string, required): The document text to summarize
|
||||
- `max_length` (integer, optional): Maximum summary length in words (default: 100)
|
||||
|
||||
**Returns:**
|
||||
```json
|
||||
{
|
||||
"summary": "The summarized text...",
|
||||
"original_length": 12345,
|
||||
"method": "direct", // or "chunked"
|
||||
"chunks": 1 // number of chunks used
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,426 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MCP Summary Server (Streamable HTTP transport)
|
||||
|
||||
Designed to work with OpenWebUI's MCP (Streamable HTTP) integration.
|
||||
|
||||
Summarizes documents by:
|
||||
1. Checking text length
|
||||
2. If short, summarizing directly with LLM
|
||||
3. If long, chunking text, summarizing each chunk, then synthesizing
|
||||
|
||||
All processing happens server-side, keeping full text out of the chat context window.
|
||||
|
||||
Tools:
|
||||
- summarize_document: Summarize a document (handles chunking automatically)
|
||||
|
||||
Auth:
|
||||
- If API_KEY is set:
|
||||
- Requires: Authorization: Bearer <API_KEY>
|
||||
- If API_KEY is not set:
|
||||
- No auth required (for local/internal use).
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
|
||||
# MCP Server Configuration
|
||||
API_KEY = os.environ.get("API_KEY", "").strip()
|
||||
PORT = int(os.environ.get("PORT", "8080"))
|
||||
|
||||
# LLM Configuration
|
||||
OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
|
||||
OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
|
||||
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
|
||||
|
||||
# Summarization Configuration
|
||||
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000")) # Characters per chunk
|
||||
OVERLAP = int(os.environ.get("OVERLAP", "200")) # Characters of overlap between chunks
|
||||
TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150")) # Words
|
||||
MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100")) # Words for final summary
|
||||
MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000")) # Characters before chunking
|
||||
|
||||
# Tool definitions
|
||||
TOOLS_LIST: Dict[str, Any] = {
|
||||
"tools": [
|
||||
{
|
||||
"name": "summarize_document",
|
||||
"description": "Summarize a document. Automatically handles chunking for long text. Returns a concise summary without exposing the full text.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "The document text to summarize"
|
||||
},
|
||||
"max_length": {
|
||||
"type": "integer",
|
||||
"description": f"Maximum length of summary in words (default: {MAX_DIRECT_SUMMARY_LENGTH})"
|
||||
}
|
||||
},
|
||||
"required": ["text"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
|
||||
"""Make an OpenAPI-compatible LLM call."""
|
||||
url = f"{OPENAPI_URL}/chat/completions"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {OPENAPI_API_KEY}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": MODEL_NAME,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": 2000,
|
||||
"top_p": 0.9
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=60)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
|
||||
|
||||
def chunk_text(text: str) -> List[str]:
|
||||
"""Split text into chunks with overlap for summarization."""
|
||||
if len(text) <= CHUNK_SIZE:
|
||||
return [text]
|
||||
|
||||
chunks = []
|
||||
start = 0
|
||||
|
||||
while start < len(text):
|
||||
# Find a good breaking point (after sentence or paragraph)
|
||||
end = min(start + CHUNK_SIZE, len(text))
|
||||
|
||||
# Try to break at sentence boundary
|
||||
search_end = min(end, len(text))
|
||||
break_point = -1
|
||||
|
||||
# Look for paragraph break first
|
||||
for marker in ["\n\n", "\n"]:
|
||||
pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end)
|
||||
if pos > 0:
|
||||
break_point = pos
|
||||
break
|
||||
|
||||
# If no paragraph break, look for sentence break
|
||||
if break_point == -1:
|
||||
for marker in [".", "!", "?"]:
|
||||
pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end)
|
||||
if pos > 0:
|
||||
break_point = pos
|
||||
break
|
||||
|
||||
if break_point == -1:
|
||||
break_point = end
|
||||
|
||||
chunk = text[start:break_point]
|
||||
if chunk.strip():
|
||||
chunks.append(chunk)
|
||||
|
||||
start = break_point - OVERLAP if break_point < len(text) else len(text)
|
||||
if start >= len(text):
|
||||
break
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def summarize_chunk(chunk: str, chunk_num: int, total_chunks: int) -> str:
|
||||
"""Summarize a single chunk of text."""
|
||||
system_prompt = f"""You are a precise legal assistant specializing in creating concise, accurate summaries.
|
||||
|
||||
You are processing chunk {chunk_num} of {total_chunks} from a larger document.
|
||||
|
||||
Your task: Create a focused summary of this chunk that:
|
||||
- Captures the key points and important details
|
||||
- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words
|
||||
- Can be combined with summaries of other chunks to form a complete picture
|
||||
- Uses clear, professional language
|
||||
- Preserves important names, dates, and specific facts
|
||||
|
||||
Format your response as plain text without bullet points or special formatting."""
|
||||
|
||||
user_prompt = f"""Summarize the following text (chunk {chunk_num} of {total_chunks}):
|
||||
|
||||
{text}
|
||||
|
||||
Summary:"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
return call_llm(messages)
|
||||
|
||||
|
||||
def synthesize_summaries(chunk_summaries: List[str]) -> str:
|
||||
"""Synthesize multiple chunk summaries into a single final summary."""
|
||||
combined = "\n\n".join(chunk_summaries)
|
||||
|
||||
system_prompt = """You are a precise legal assistant creating executive-level summaries.
|
||||
|
||||
Your task: Synthesize the provided partial summaries into a single, cohesive summary that:
|
||||
- Is approximately 100 words
|
||||
- Captures the complete picture of the document
|
||||
- Is clear and professional
|
||||
- Removes redundancy
|
||||
- Maintains logical flow
|
||||
- Preserves all critical information
|
||||
|
||||
Format your response as a single paragraph of plain text."""
|
||||
|
||||
user_prompt = f"""Synthesize the following partial summaries into one cohesive summary:
|
||||
|
||||
{combined}
|
||||
|
||||
Final summary:"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
return call_llm(messages)
|
||||
|
||||
|
||||
def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]:
|
||||
"""
|
||||
Main summarization function.
|
||||
|
||||
- If text is short, summarize directly
|
||||
- If text is long, chunk and summarize each chunk, then synthesize
|
||||
"""
|
||||
original_length = len(text)
|
||||
|
||||
# Strip whitespace and validate
|
||||
text = text.strip()
|
||||
if not text:
|
||||
raise ValueError("Empty text provided")
|
||||
|
||||
# Direct summarization for shorter texts
|
||||
if len(text) <= MAX_DIRECT_TEXT_LENGTH:
|
||||
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
||||
|
||||
Your task: Create a summary that:
|
||||
- Is approximately {max_length} words
|
||||
- Captures the key points and important details
|
||||
- Uses clear, professional language
|
||||
- Preserves important names, dates, and specific facts
|
||||
- Is suitable for a legal professional
|
||||
|
||||
Format your response as plain text without bullet points or special formatting."""
|
||||
|
||||
user_prompt = f"""Summarize the following document:
|
||||
|
||||
{text}
|
||||
|
||||
Summary:"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt}
|
||||
]
|
||||
|
||||
summary = call_llm(messages)
|
||||
|
||||
return {
|
||||
"summary": summary,
|
||||
"original_length": original_length,
|
||||
"method": "direct",
|
||||
"chunks": 1
|
||||
}
|
||||
|
||||
# Chunked summarization for longer texts
|
||||
chunks = chunk_text(text)
|
||||
|
||||
# Summarize each chunk
|
||||
chunk_summaries = []
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
chunk_summary = summarize_chunk(chunk, i, len(chunks))
|
||||
chunk_summaries.append(chunk_summary)
|
||||
|
||||
# Synthesize into final summary
|
||||
final_summary = synthesize_summaries(chunk_summaries)
|
||||
|
||||
return {
|
||||
"summary": final_summary,
|
||||
"original_length": original_length,
|
||||
"method": "chunked",
|
||||
"chunks": len(chunks)
|
||||
}
|
||||
|
||||
|
||||
class MCPSummaryHandler(BaseHTTPRequestHandler):
|
||||
"""HTTP handler for MCP summary server."""
|
||||
|
||||
def log_message(self, format, *args):
|
||||
# Quiet logs by default
|
||||
pass
|
||||
|
||||
def _send_json(self, status: int, payload: Any):
|
||||
"""Send JSON response."""
|
||||
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def _auth_or_401(self) -> bool:
|
||||
"""Check authentication if API key is configured."""
|
||||
if not API_KEY:
|
||||
return True
|
||||
|
||||
auth_header = self.headers.get("Authorization", "")
|
||||
if not auth_header.startswith("Bearer "):
|
||||
self._send_json(401, {"error": "Missing or invalid API key"})
|
||||
return False
|
||||
|
||||
token = auth_header[len("Bearer "):].strip()
|
||||
if token != API_KEY:
|
||||
self._send_json(401, {"error": "Invalid API key"})
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def do_GET(self):
|
||||
"""Handle GET requests (health check)."""
|
||||
if self.path == "/":
|
||||
self._send_json(200, {
|
||||
"service": "mcp-summary",
|
||||
"transport": "streamable-http",
|
||||
"model": MODEL_NAME,
|
||||
"docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
|
||||
})
|
||||
return
|
||||
|
||||
self.send_error(404, "Not Found")
|
||||
|
||||
def do_POST(self):
|
||||
"""Handle MCP JSON-RPC requests."""
|
||||
if self.path not in ("/", "/mcp"):
|
||||
self.send_error(404, "Not Found")
|
||||
return
|
||||
|
||||
if not self._auth_or_401():
|
||||
return
|
||||
|
||||
# Parse request
|
||||
length = int(self.headers.get("Content-Length", 0))
|
||||
if length == 0:
|
||||
self._send_json(400, {"error": "Empty body"})
|
||||
return
|
||||
|
||||
raw = self.rfile.read(length)
|
||||
try:
|
||||
req = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
self._send_json(400, {"error": "Invalid JSON"})
|
||||
return
|
||||
|
||||
method = req.get("method")
|
||||
params = req.get("params") or {}
|
||||
req_id = req.get("id")
|
||||
|
||||
# MCP: initialize
|
||||
if method == "initialize":
|
||||
self._send_json(200, {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"result": {
|
||||
"protocolVersion": "2025-11-25",
|
||||
"capabilities": {
|
||||
"tools": {}
|
||||
},
|
||||
"serverInfo": {
|
||||
"name": "mcp-summary",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
}
|
||||
})
|
||||
return
|
||||
|
||||
# MCP: tools/list
|
||||
if method == "tools/list":
|
||||
self._send_json(200, {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"result": TOOLS_LIST
|
||||
})
|
||||
return
|
||||
|
||||
# MCP: tools/call
|
||||
if method == "tools/call":
|
||||
tool_name = params.get("name")
|
||||
tool_args = params.get("arguments") or {}
|
||||
|
||||
try:
|
||||
result = self._call_tool(tool_name, tool_args)
|
||||
self._send_json(200, {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"result": {
|
||||
"content": [
|
||||
{"type": "text", "text": json.dumps(result, ensure_ascii=False)}
|
||||
]
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
self._send_json(200, {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"error": {
|
||||
"code": -32000,
|
||||
"message": str(e)
|
||||
}
|
||||
})
|
||||
return
|
||||
|
||||
# Unknown method
|
||||
self._send_json(400, {"error": "Unknown method: " + str(method)})
|
||||
|
||||
def _call_tool(self, name: str, args: Dict[str, Any]) -> Any:
|
||||
"""Execute a tool call."""
|
||||
if name == "summarize_document":
|
||||
text = args.get("text")
|
||||
if not text:
|
||||
raise ValueError("Text parameter is required")
|
||||
|
||||
max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH)
|
||||
return summarize_document(text, max_length)
|
||||
|
||||
raise ValueError(f"Unknown tool: {name}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Start the MCP summary server."""
|
||||
server = HTTPServer(("0.0.0.0", PORT), MCPSummaryHandler)
|
||||
mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
|
||||
print(f"MCP Summary Server listening on 0.0.0.0:{PORT} [{mode}]")
|
||||
print(f" - Model: {MODEL_NAME}")
|
||||
print(f" - Chunk size: {CHUNK_SIZE} characters")
|
||||
print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
|
||||
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nShutting down...")
|
||||
server.server_close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,4 @@
|
||||
# requirements.txt for MCP Summary Server
|
||||
|
||||
# HTTP requests for LLM communication
|
||||
requests>=2.31.0
|
||||
Reference in New Issue
Block a user