Initial commit: MCP Summary Server

2026-06-13 22:36:29 +00:00
commit dbddfcd61d
5 changed files with 575 additions and 0 deletions
@@ -0,0 +1,29 @@
 # MCP Summary Server - Environment Variables
 # Server Configuration
 PORT=8080
 # Authentication (optional)
 # If set, requests must include: Authorization: Bearer <API_KEY>
 API_KEY=
 # LLM Configuration
 OPENAPI_URL=http://localhost:8080/v1
 OPENAPI_API_KEY=
 MODEL_NAME=gpt-4o
 # Summarization Configuration
 # Characters per chunk when splitting long text
 CHUNK_SIZE=4000
 # Characters of overlap between chunks to maintain context
 OVERLAP=200
 # Target length for intermediate chunk summaries (words)
 TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
 # Maximum length for final synthesized summary (words)
 MAX_DIRECT_SUMMARY_LENGTH=100
 # Maximum text length (characters) before chunking is triggered
 MAX_DIRECT_TEXT_LENGTH=8000
@@ -0,0 +1,36 @@
 # Dockerfile for MCP Summary Server
 #
 # Usage (from directory containing this Dockerfile and mcp_summary_server.py):
 #
 #   docker build -t mcp-summary .
 #   docker run -p 8080:8080 --env-file .env mcp-summary
 #
 FROM python:3.12-slim
 WORKDIR /app
 # Install runtime dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
 # Copy the server script
 COPY mcp_summary_server.py /app/mcp_summary_server.py
 # Expose HTTP port
 EXPOSE 8080
 # Environment variables
 ENV PORT=8080
 ENV OPENAPI_URL=http://localhost:8080/v1
 ENV OPENAPI_API_KEY=""
 ENV MODEL_NAME=gpt-4o
 ENV CHUNK_SIZE=4000
 ENV OVERLAP=200
 ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
 ENV MAX_DIRECT_SUMMARY_LENGTH=100
 ENV MAX_DIRECT_TEXT_LENGTH=8000
 ENV API_KEY=""
 # Start the MCP summary server
 ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"]
@@ -0,0 +1,80 @@
 # MCP Summary Server
 An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window.
 ## Features
 - Automatically determines whether to summarize directly or use chunked summarization
 - All processing happens server-side
 - Returns only the summary to the client
 - Configurable chunking parameters
 - Bearer token authentication (optional)
 ## Setup
 ### Environment Variables
 Copy `.env.example` to `.env` and configure:
 ```bash
 cp .env.example .env
 ```
 | Variable | Default | Description |
 |----------|---------|-------------|
 | PORT | 8080 | HTTP server port |
 | API_KEY | (empty) | Bearer token for authentication |
 | OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
 | OPENAPI_API_KEY | (empty) | LLM API key |
 | MODEL_NAME | gpt-4o | LLM model to use |
 | CHUNK_SIZE | 4000 | Characters per chunk |
 | OVERLAP | 200 | Characters of overlap between chunks |
 | TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
 | MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length |
 | MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking |
 ## Running
 ### Docker
 ```bash
 # Build
 docker build -t mcp-summary .
 # Run with environment file
 docker run -p 8080:8080 --env-file .env mcp-summary
 # Run with inline environment variables
 docker run -p 8080:8080 \
  -e OPENAPI_URL=http://localhost:8080/v1 \
  -e OPENAPI_API_KEY=your-key \
  -e MODEL_NAME=gpt-4o \
  mcp-summary
 ```
 ### Python
 ```bash
 pip install -r requirements.txt
 python mcp_summary_server.py
 ```
 ## MCP Tool
 ### summarize_document
 Summarizes a document, automatically handling chunking for long text.
 **Parameters:**
 - `text` (string, required): The document text to summarize
 - `max_length` (integer, optional): Maximum summary length in words (default: 100)
 **Returns:**
 ```json
 {
  "summary": "The summarized text...",
  "original_length": 12345,
  "method": "direct",  // or "chunked"
  "chunks": 1  // number of chunks used
 }
 ```
@@ -0,0 +1,426 @@
 #!/usr/bin/env python3
 """
 MCP Summary Server (Streamable HTTP transport)
 Designed to work with OpenWebUI's MCP (Streamable HTTP) integration.
 Summarizes documents by:
 1. Checking text length
 2. If short, summarizing directly with LLM
 3. If long, chunking text, summarizing each chunk, then synthesizing
 All processing happens server-side, keeping full text out of the chat context window.
 Tools:
 - summarize_document: Summarize a document (handles chunking automatically)
 Auth:
 - If API_KEY is set:
  - Requires: Authorization: Bearer <API_KEY>
 - If API_KEY is not set:
  - No auth required (for local/internal use).
 """
 import json
 import os
 import sys
 import re
 from http.server import HTTPServer, BaseHTTPRequestHandler
 from typing import Any, Dict, List, Optional
 import requests
 # MCP Server Configuration
 API_KEY = os.environ.get("API_KEY", "").strip()
 PORT = int(os.environ.get("PORT", "8080"))
 # LLM Configuration
 OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
 OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
 MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
 # Summarization Configuration
 CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))  # Characters per chunk
 OVERLAP = int(os.environ.get("OVERLAP", "200"))  # Characters of overlap between chunks
 TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))  # Words
 MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100"))  # Words for final summary
 MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))  # Characters before chunking
 # Tool definitions
 TOOLS_LIST: Dict[str, Any] = {
    "tools": [
        {
            "name": "summarize_document",
            "description": "Summarize a document. Automatically handles chunking for long text. Returns a concise summary without exposing the full text.",
            "inputSchema": {
                "type": "object",
                "properties": {
                    "text": {
                        "type": "string",
                        "description": "The document text to summarize"
                    },
                    "max_length": {
                        "type": "integer",
                        "description": f"Maximum length of summary in words (default: {MAX_DIRECT_SUMMARY_LENGTH})"
                    }
                },
                "required": ["text"]
            }
        }
    ]
 }
 def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
    """Make an OpenAPI-compatible LLM call."""
    url = f"{OPENAPI_URL}/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAPI_API_KEY}"
    }
    payload = {
        "model": MODEL_NAME,
        "messages": messages,
        "temperature": temperature,
        "max_tokens": 2000,
        "top_p": 0.9
    }
    response = requests.post(url, headers=headers, json=payload, timeout=60)
    response.raise_for_status()
    data = response.json()
    return data["choices"][0]["message"]["content"]
 def chunk_text(text: str) -> List[str]:
    """Split text into chunks with overlap for summarization."""
    if len(text) <= CHUNK_SIZE:
        return [text]
    chunks = []
    start = 0
    while start < len(text):
        # Find a good breaking point (after sentence or paragraph)
        end = min(start + CHUNK_SIZE, len(text))
        # Try to break at sentence boundary
        search_end = min(end, len(text))
        break_point = -1
        # Look for paragraph break first
        for marker in ["\n\n", "\n"]:
            pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end)
            if pos > 0:
                break_point = pos
                break
        # If no paragraph break, look for sentence break
        if break_point == -1:
            for marker in [".", "!", "?"]:
                pos = text.rfind(marker, start + CHUNK_SIZE // 2, search_end)
                if pos > 0:
                    break_point = pos
                    break
        if break_point == -1:
            break_point = end
        chunk = text[start:break_point]
        if chunk.strip():
            chunks.append(chunk)
        start = break_point - OVERLAP if break_point < len(text) else len(text)
        if start >= len(text):
            break
    return chunks
 def summarize_chunk(chunk: str, chunk_num: int, total_chunks: int) -> str:
    """Summarize a single chunk of text."""
    system_prompt = f"""You are a precise legal assistant specializing in creating concise, accurate summaries.
 You are processing chunk {chunk_num} of {total_chunks} from a larger document.
 Your task: Create a focused summary of this chunk that:
 - Captures the key points and important details
 - Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words
 - Can be combined with summaries of other chunks to form a complete picture
 - Uses clear, professional language
 - Preserves important names, dates, and specific facts
 Format your response as plain text without bullet points or special formatting."""
    user_prompt = f"""Summarize the following text (chunk {chunk_num} of {total_chunks}):
 {text}
 Summary:"""
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    return call_llm(messages)
 def synthesize_summaries(chunk_summaries: List[str]) -> str:
    """Synthesize multiple chunk summaries into a single final summary."""
    combined = "\n\n".join(chunk_summaries)
    system_prompt = """You are a precise legal assistant creating executive-level summaries.
 Your task: Synthesize the provided partial summaries into a single, cohesive summary that:
 - Is approximately 100 words
 - Captures the complete picture of the document
 - Is clear and professional
 - Removes redundancy
 - Maintains logical flow
 - Preserves all critical information
 Format your response as a single paragraph of plain text."""
    user_prompt = f"""Synthesize the following partial summaries into one cohesive summary:
 {combined}
 Final summary:"""
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    return call_llm(messages)
 def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]:
    """
    Main summarization function.
    - If text is short, summarize directly
    - If text is long, chunk and summarize each chunk, then synthesize
    """
    original_length = len(text)
    # Strip whitespace and validate
    text = text.strip()
    if not text:
        raise ValueError("Empty text provided")
    # Direct summarization for shorter texts
    if len(text) <= MAX_DIRECT_TEXT_LENGTH:
        system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
 Your task: Create a summary that:
 - Is approximately {max_length} words
 - Captures the key points and important details
 - Uses clear, professional language
 - Preserves important names, dates, and specific facts
 - Is suitable for a legal professional
 Format your response as plain text without bullet points or special formatting."""
        user_prompt = f"""Summarize the following document:
 {text}
 Summary:"""
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        summary = call_llm(messages)
        return {
            "summary": summary,
            "original_length": original_length,
            "method": "direct",
            "chunks": 1
        }
    # Chunked summarization for longer texts
    chunks = chunk_text(text)
    # Summarize each chunk
    chunk_summaries = []
    for i, chunk in enumerate(chunks, 1):
        chunk_summary = summarize_chunk(chunk, i, len(chunks))
        chunk_summaries.append(chunk_summary)
    # Synthesize into final summary
    final_summary = synthesize_summaries(chunk_summaries)
    return {
        "summary": final_summary,
        "original_length": original_length,
        "method": "chunked",
        "chunks": len(chunks)
    }
 class MCPSummaryHandler(BaseHTTPRequestHandler):
    """HTTP handler for MCP summary server."""
    def log_message(self, format, *args):
        # Quiet logs by default
        pass
    def _send_json(self, status: int, payload: Any):
        """Send JSON response."""
        body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)
    def _auth_or_401(self) -> bool:
        """Check authentication if API key is configured."""
        if not API_KEY:
            return True
        auth_header = self.headers.get("Authorization", "")
        if not auth_header.startswith("Bearer "):
            self._send_json(401, {"error": "Missing or invalid API key"})
            return False
        token = auth_header[len("Bearer "):].strip()
        if token != API_KEY:
            self._send_json(401, {"error": "Invalid API key"})
            return False
        return True
    def do_GET(self):
        """Handle GET requests (health check)."""
        if self.path == "/":
            self._send_json(200, {
                "service": "mcp-summary",
                "transport": "streamable-http",
                "model": MODEL_NAME,
                "docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
            })
            return
        self.send_error(404, "Not Found")
    def do_POST(self):
        """Handle MCP JSON-RPC requests."""
        if self.path not in ("/", "/mcp"):
            self.send_error(404, "Not Found")
            return
        if not self._auth_or_401():
            return
        # Parse request
        length = int(self.headers.get("Content-Length", 0))
        if length == 0:
            self._send_json(400, {"error": "Empty body"})
            return
        raw = self.rfile.read(length)
        try:
            req = json.loads(raw)
        except json.JSONDecodeError:
            self._send_json(400, {"error": "Invalid JSON"})
            return
        method = req.get("method")
        params = req.get("params") or {}
        req_id = req.get("id")
        # MCP: initialize
        if method == "initialize":
            self._send_json(200, {
                "jsonrpc": "2.0",
                "id": req_id,
                "result": {
                    "protocolVersion": "2025-11-25",
                    "capabilities": {
                        "tools": {}
                    },
                    "serverInfo": {
                        "name": "mcp-summary",
                        "version": "1.0.0"
                    }
                }
            })
            return
        # MCP: tools/list
        if method == "tools/list":
            self._send_json(200, {
                "jsonrpc": "2.0",
                "id": req_id,
                "result": TOOLS_LIST
            })
            return
        # MCP: tools/call
        if method == "tools/call":
            tool_name = params.get("name")
            tool_args = params.get("arguments") or {}
            try:
                result = self._call_tool(tool_name, tool_args)
                self._send_json(200, {
                    "jsonrpc": "2.0",
                    "id": req_id,
                    "result": {
                        "content": [
                            {"type": "text", "text": json.dumps(result, ensure_ascii=False)}
                        ]
                    }
                })
            except Exception as e:
                self._send_json(200, {
                    "jsonrpc": "2.0",
                    "id": req_id,
                    "error": {
                        "code": -32000,
                        "message": str(e)
                    }
                })
            return
        # Unknown method
        self._send_json(400, {"error": "Unknown method: " + str(method)})
    def _call_tool(self, name: str, args: Dict[str, Any]) -> Any:
        """Execute a tool call."""
        if name == "summarize_document":
            text = args.get("text")
            if not text:
                raise ValueError("Text parameter is required")
            max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH)
            return summarize_document(text, max_length)
        raise ValueError(f"Unknown tool: {name}")
 def main():
    """Start the MCP summary server."""
    server = HTTPServer(("0.0.0.0", PORT), MCPSummaryHandler)
    mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
    print(f"MCP Summary Server listening on 0.0.0.0:{PORT} [{mode}]")
    print(f"  - Model: {MODEL_NAME}")
    print(f"  - Chunk size: {CHUNK_SIZE} characters")
    print(f"  - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
    try:
        server.serve_forever()
    except KeyboardInterrupt:
        print("\nShutting down...")
        server.server_close()
 if __name__ == "__main__":
    main()
@@ -0,0 +1,4 @@
 # requirements.txt for MCP Summary Server
 # HTTP requests for LLM communication
 requests>=2.31.0