commit dbddfcd61d9f064dfe20119b62a276bea14aa14f Author: Admin Date: Sat Jun 13 22:36:29 2026 +0000 Initial commit: MCP Summary Server diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d4cfbbe --- /dev/null +++ b/.env.example @@ -0,0 +1,29 @@ +# MCP Summary Server - Environment Variables + +# Server Configuration +PORT=8080 + +# Authentication (optional) +# If set, requests must include: Authorization: Bearer +API_KEY= + +# LLM Configuration +OPENAPI_URL=http://localhost:8080/v1 +OPENAPI_API_KEY= +MODEL_NAME=gpt-4o + +# Summarization Configuration +# Characters per chunk when splitting long text +CHUNK_SIZE=4000 + +# Characters of overlap between chunks to maintain context +OVERLAP=200 + +# Target length for intermediate chunk summaries (words) +TARGET_INTERMEDIATE_SUMMARY_LENGTH=150 + +# Maximum length for final synthesized summary (words) +MAX_DIRECT_SUMMARY_LENGTH=100 + +# Maximum text length (characters) before chunking is triggered +MAX_DIRECT_TEXT_LENGTH=8000 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..1e02c74 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# Dockerfile for MCP Summary Server +# +# Usage (from directory containing this Dockerfile and mcp_summary_server.py): +# +# docker build -t mcp-summary . +# docker run -p 8080:8080 --env-file .env mcp-summary +# + +FROM python:3.12-slim + +WORKDIR /app + +# Install runtime dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt + +# Copy the server script +COPY mcp_summary_server.py /app/mcp_summary_server.py + +# Expose HTTP port +EXPOSE 8080 + +# Environment variables +ENV PORT=8080 +ENV OPENAPI_URL=http://localhost:8080/v1 +ENV OPENAPI_API_KEY="" +ENV MODEL_NAME=gpt-4o +ENV CHUNK_SIZE=4000 +ENV OVERLAP=200 +ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150 +ENV MAX_DIRECT_SUMMARY_LENGTH=100 +ENV MAX_DIRECT_TEXT_LENGTH=8000 +ENV API_KEY="" + +# Start the MCP summary server +ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..ade9adf --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +# MCP Summary Server + +An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window. + +## Features + +- Automatically determines whether to summarize directly or use chunked summarization +- All processing happens server-side +- Returns only the summary to the client +- Configurable chunking parameters +- Bearer token authentication (optional) + +## Setup + +### Environment Variables + +Copy `.env.example` to `.env` and configure: + +```bash +cp .env.example .env +``` + +| Variable | Default | Description | +|----------|---------|-------------| +| PORT | 8080 | HTTP server port | +| API_KEY | (empty) | Bearer token for authentication | +| OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint | +| OPENAPI_API_KEY | (empty) | LLM API key | +| MODEL_NAME | gpt-4o | LLM model to use | +| CHUNK_SIZE | 4000 | Characters per chunk | +| OVERLAP | 200 | Characters of overlap between chunks | +| TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary | +| MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length | +| MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking | + +## Running + +### Docker + +```bash +# Build +docker build -t mcp-summary . + +# Run with environment file +docker run -p 8080:8080 --env-file .env mcp-summary + +# Run with inline environment variables +docker run -p 8080:8080 \ + -e OPENAPI_URL=http://localhost:8080/v1 \ + -e OPENAPI_API_KEY=your-key \ + -e MODEL_NAME=gpt-4o \ + mcp-summary +``` + +### Python + +```bash +pip install -r requirements.txt +python mcp_summary_server.py +``` + +## MCP Tool + +### summarize_document + +Summarizes a document, automatically handling chunking for long text. + +**Parameters:** +- `text` (string, required): The document text to summarize +- `max_length` (integer, optional): Maximum summary length in words (default: 100) + +**Returns:** +```json +{ + "summary": "The summarized text...", + "original_length": 12345, + "method": "direct", // or "chunked" + "chunks": 1 // number of chunks used +} +``` diff --git a/mcp_summary_server.py b/mcp_summary_server.py new file mode 100644 index 0000000..8d53383 --- /dev/null +++ b/mcp_summary_server.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python3 +""" +MCP Summary Server (Streamable HTTP transport) + +Designed to work with OpenWebUI's MCP (Streamable HTTP) integration. + +Summarizes documents by: +1. Checking text length +2. If short, summarizing directly with LLM +3. If long, chunking text, summarizing each chunk, then synthesizing + +All processing happens server-side, keeping full text out of the chat context window. + +Tools: +- summarize_document: Summarize a document (handles chunking automatically) + +Auth: +- If API_KEY is set: + - Requires: Authorization: Bearer +- If API_KEY is not set: + - No auth required (for local/internal use). +""" + +import json +import os +import sys +import re +from http.server import HTTPServer, BaseHTTPRequestHandler +from typing import Any, Dict, List, Optional +import requests + +# MCP Server Configuration +API_KEY = os.environ.get("API_KEY", "").strip() +PORT = int(os.environ.get("PORT", "8080")) + +# LLM Configuration +OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1") +OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "") +MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o") + +# Summarization Configuration +CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000")) # Characters per chunk +OVERLAP = int(os.environ.get("OVERLAP", "200")) # Characters of overlap between chunks +TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150")) # Words +MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100")) # Words for final summary +MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000")) # Characters before chunking + +# Tool definitions +TOOLS_LIST: Dict[str, Any] = { + "tools": [ + { + "name": "summarize_document", + "description": "Summarize a document. Automatically handles chunking for long text. Returns a concise summary without exposing the full text.", + "inputSchema": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The document text to summarize" + }, + "max_length": { + "type": "integer", + "description": f"Maximum length of summary in words (default: {MAX_DIRECT_SUMMARY_LENGTH})" + } + }, + "required": ["text"] + } + } + ] +} + + +def call_llm(messages: List[Dict], temperature: float = 0.3) -> str: + """Make an OpenAPI-compatible LLM call.""" + url = f"{OPENAPI_URL}/chat/completions" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {OPENAPI_API_KEY}" + } + + payload = { + "model": MODEL_NAME, + "messages": messages, + "temperature": temperature, + "max_tokens": 2000, + "top_p": 0.9 + } + + response = requests.post(url, headers=headers, json=payload, timeout=60) + response.raise_for_status() + + data = response.json() + return data["choices"][0]["message"]["content"] + + +def chunk_text(text: str) -> List[str]: + """Split text into chunks with overlap for summarization.""" + if len(text) <= CHUNK_SIZE: + return [text] + + chunks = [] + start = 0 + + while start < len(text): + # Find a good breaking point (after sentence or paragraph) + end = min(start + CHUNK_SIZE, len(text)) + + # Try to break at sentence boundary + search_end = min(end, len(text)) + break_point = -1 + + # Look for paragraph break first + for marker in ["\n\n", "\n"]: + pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end) + if pos > 0: + break_point = pos + break + + # If no paragraph break, look for sentence break + if break_point == -1: + for marker in [".", "!", "?"]: + pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end) + if pos > 0: + break_point = pos + break + + if break_point == -1: + break_point = end + + chunk = text[start:break_point] + if chunk.strip(): + chunks.append(chunk) + + start = break_point - OVERLAP if break_point < len(text) else len(text) + if start >= len(text): + break + + return chunks + + +def summarize_chunk(chunk: str, chunk_num: int, total_chunks: int) -> str: + """Summarize a single chunk of text.""" + system_prompt = f"""You are a precise legal assistant specializing in creating concise, accurate summaries. + +You are processing chunk {chunk_num} of {total_chunks} from a larger document. + +Your task: Create a focused summary of this chunk that: +- Captures the key points and important details +- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words +- Can be combined with summaries of other chunks to form a complete picture +- Uses clear, professional language +- Preserves important names, dates, and specific facts + +Format your response as plain text without bullet points or special formatting.""" + + user_prompt = f"""Summarize the following text (chunk {chunk_num} of {total_chunks}): + +{text} + +Summary:""" + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + + return call_llm(messages) + + +def synthesize_summaries(chunk_summaries: List[str]) -> str: + """Synthesize multiple chunk summaries into a single final summary.""" + combined = "\n\n".join(chunk_summaries) + + system_prompt = """You are a precise legal assistant creating executive-level summaries. + +Your task: Synthesize the provided partial summaries into a single, cohesive summary that: +- Is approximately 100 words +- Captures the complete picture of the document +- Is clear and professional +- Removes redundancy +- Maintains logical flow +- Preserves all critical information + +Format your response as a single paragraph of plain text.""" + + user_prompt = f"""Synthesize the following partial summaries into one cohesive summary: + +{combined} + +Final summary:""" + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + + return call_llm(messages) + + +def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]: + """ + Main summarization function. + + - If text is short, summarize directly + - If text is long, chunk and summarize each chunk, then synthesize + """ + original_length = len(text) + + # Strip whitespace and validate + text = text.strip() + if not text: + raise ValueError("Empty text provided") + + # Direct summarization for shorter texts + if len(text) <= MAX_DIRECT_TEXT_LENGTH: + system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries. + +Your task: Create a summary that: +- Is approximately {max_length} words +- Captures the key points and important details +- Uses clear, professional language +- Preserves important names, dates, and specific facts +- Is suitable for a legal professional + +Format your response as plain text without bullet points or special formatting.""" + + user_prompt = f"""Summarize the following document: + +{text} + +Summary:""" + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + + summary = call_llm(messages) + + return { + "summary": summary, + "original_length": original_length, + "method": "direct", + "chunks": 1 + } + + # Chunked summarization for longer texts + chunks = chunk_text(text) + + # Summarize each chunk + chunk_summaries = [] + for i, chunk in enumerate(chunks, 1): + chunk_summary = summarize_chunk(chunk, i, len(chunks)) + chunk_summaries.append(chunk_summary) + + # Synthesize into final summary + final_summary = synthesize_summaries(chunk_summaries) + + return { + "summary": final_summary, + "original_length": original_length, + "method": "chunked", + "chunks": len(chunks) + } + + +class MCPSummaryHandler(BaseHTTPRequestHandler): + """HTTP handler for MCP summary server.""" + + def log_message(self, format, *args): + # Quiet logs by default + pass + + def _send_json(self, status: int, payload: Any): + """Send JSON response.""" + body = json.dumps(payload, ensure_ascii=False).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def _auth_or_401(self) -> bool: + """Check authentication if API key is configured.""" + if not API_KEY: + return True + + auth_header = self.headers.get("Authorization", "") + if not auth_header.startswith("Bearer "): + self._send_json(401, {"error": "Missing or invalid API key"}) + return False + + token = auth_header[len("Bearer "):].strip() + if token != API_KEY: + self._send_json(401, {"error": "Invalid API key"}) + return False + + return True + + def do_GET(self): + """Handle GET requests (health check).""" + if self.path == "/": + self._send_json(200, { + "service": "mcp-summary", + "transport": "streamable-http", + "model": MODEL_NAME, + "docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)." + }) + return + + self.send_error(404, "Not Found") + + def do_POST(self): + """Handle MCP JSON-RPC requests.""" + if self.path not in ("/", "/mcp"): + self.send_error(404, "Not Found") + return + + if not self._auth_or_401(): + return + + # Parse request + length = int(self.headers.get("Content-Length", 0)) + if length == 0: + self._send_json(400, {"error": "Empty body"}) + return + + raw = self.rfile.read(length) + try: + req = json.loads(raw) + except json.JSONDecodeError: + self._send_json(400, {"error": "Invalid JSON"}) + return + + method = req.get("method") + params = req.get("params") or {} + req_id = req.get("id") + + # MCP: initialize + if method == "initialize": + self._send_json(200, { + "jsonrpc": "2.0", + "id": req_id, + "result": { + "protocolVersion": "2025-11-25", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": "mcp-summary", + "version": "1.0.0" + } + } + }) + return + + # MCP: tools/list + if method == "tools/list": + self._send_json(200, { + "jsonrpc": "2.0", + "id": req_id, + "result": TOOLS_LIST + }) + return + + # MCP: tools/call + if method == "tools/call": + tool_name = params.get("name") + tool_args = params.get("arguments") or {} + + try: + result = self._call_tool(tool_name, tool_args) + self._send_json(200, { + "jsonrpc": "2.0", + "id": req_id, + "result": { + "content": [ + {"type": "text", "text": json.dumps(result, ensure_ascii=False)} + ] + } + }) + except Exception as e: + self._send_json(200, { + "jsonrpc": "2.0", + "id": req_id, + "error": { + "code": -32000, + "message": str(e) + } + }) + return + + # Unknown method + self._send_json(400, {"error": "Unknown method: " + str(method)}) + + def _call_tool(self, name: str, args: Dict[str, Any]) -> Any: + """Execute a tool call.""" + if name == "summarize_document": + text = args.get("text") + if not text: + raise ValueError("Text parameter is required") + + max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH) + return summarize_document(text, max_length) + + raise ValueError(f"Unknown tool: {name}") + + +def main(): + """Start the MCP summary server.""" + server = HTTPServer(("0.0.0.0", PORT), MCPSummaryHandler) + mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)" + print(f"MCP Summary Server listening on 0.0.0.0:{PORT} [{mode}]") + print(f" - Model: {MODEL_NAME}") + print(f" - Chunk size: {CHUNK_SIZE} characters") + print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters") + + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nShutting down...") + server.server_close() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f66fef9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +# requirements.txt for MCP Summary Server + +# HTTP requests for LLM communication +requests>=2.31.0