Fix: Correct authentication and add ping method

Initial commit: MCP Summary Server
2026-06-14 06:08:38 +00:00 · 2026-06-14 05:57:46 +00:00
7 changed files with 173 additions and 355 deletions
@@ -1,32 +0,0 @@
 # MCP Summary Server - Environment Variables
 # Server Configuration
 PORT=8080
 # Authentication (optional)
 # If set, requests must include: Authorization: Bearer <API_KEY>
 API_KEY=
 # LLM Configuration
 OPENAPI_URL=http://localhost:8080/v1
 OPENAPI_API_KEY=
 MODEL_NAME=gpt-4o
 # LLM Call Timeout in seconds (increase for large documents)
 LLM_TIMEOUT=120
 # Summarization Configuration
 # Characters per chunk when splitting long text
 CHUNK_SIZE=4000
 # Characters of overlap between chunks to maintain context
 OVERLAP=200
 # Target length for intermediate chunk summaries (words)
 TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
 # Maximum length for final synthesized summary (words)
 MAX_DIRECT_SUMMARY_LENGTH=100
 # Maximum text length (characters) before chunking is triggered
 MAX_DIRECT_TEXT_LENGTH=8000
@@ -1,37 +0,0 @@
 # Dockerfile for MCP Summary Server
 #
 # Usage (from directory containing this Dockerfile and mcp_summary_server.py):
 #
 #   docker build -t mcp-summary .
 #   docker run -p 8080:8080 --env-file .env mcp-summary
 #
 FROM python:3.12-slim
 WORKDIR /app
 # Install runtime dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
 # Copy the server script
 COPY mcp_summary_server.py /app/mcp_summary_server.py
 # Expose HTTP port
 EXPOSE 8080
 # Environment variables
 ENV PORT=8080
 ENV OPENAPI_URL=http://localhost:8080/v1
 ENV OPENAPI_API_KEY=
 ENV MODEL_NAME=gpt-4o
 ENV CHUNK_SIZE=4000
 ENV OVERLAP=200
 ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
 ENV MAX_DIRECT_SUMMARY_LENGTH=100
 ENV MAX_DIRECT_TEXT_LENGTH=8000
 ENV LLM_TIMEOUT=120
 ENV API_KEY=
 # Start the MCP summary server
 ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"]
@@ -1,137 +0,0 @@
 # MCP Summary Server
 An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window.
 ## Features
 - Automatically determines whether to summarize directly or use chunked summarization
 - All processing happens server-side
 - Returns only the summary to the client
 - Configurable chunking parameters
 - Bearer token authentication (optional)
 ## Setup
 ### Environment Variables
 Copy `.env.example` to `.env` and configure:
 ```bash
 cp .env.example .env
 ```
 | Variable | Default | Description |
 |----------|---------|-------------|
 | PORT | 8080 | HTTP server port |
 | API_KEY | (empty) | Bearer token for authentication |
 | OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
 | OPENAPI_API_KEY | (empty) | LLM API key |
 | MODEL_NAME | gpt-4o | LLM model to use |
 | LLM_TIMEOUT | 120 | LLM call timeout in seconds |
 | CHUNK_SIZE | 4000 | Characters per chunk |
 | OVERLAP | 200 | Characters of overlap between chunks |
 | TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
 | MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length |
 | MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking |
 ## Running
 ### Docker
 ```bash
 # Build
 docker build -t mcp-summary .
 # Run with environment file
 docker run -p 8080:8080 --env-file .env mcp-summary
 # Run with inline environment variables
 docker run -p 8080:8080 \
  -e OPENAPI_URL=http://localhost:8080/v1 \
  -e OPENAPI_API_KEY=your-key \
  -e MODEL_NAME=gpt-4o \
  mcp-summary
 ```
 ### Python
 ```bash
 pip install -r requirements.txt
 python mcp_summary_server.py
 ```
 ## Connecting to OpenWebUI
 ### In OpenWebUI Admin Settings
 1. Go to **Admin Settings → External Tools**
 2. Click **+ (Add Server)**
 3. Set **Type** to **MCP (Streamable HTTP)**
 4. Enter your **Server URL**
 5. Set **Authentication**:
   - **None** if no API key is configured
   - **Bearer** if API_KEY is set (provide the key)
 6. Save
 ### Docker Networking
 If running both OpenWebUI and MCP Summary in Docker:
 ```bash
 # Use host.docker.internal to reach host machine
 docker run -p 8080:8080 \
  -e OPENAPI_URL=http://host.docker.internal:3000/v1 \
  -e OPENAPI_API_KEY=your-key \
  mcp-summary
 ```
 If both containers are on the same Docker network, use the container name directly:
 ```bash
 docker run --network mynetwork -p 8080:8080 \
  -e OPENAPI_URL=http://openwebui-container:8080/v1 \
  -e OPENAPI_API_KEY=your-key \
  mcp-summary
 ```
 ## MCP Tool
 ### summarize_document
 Summarizes a document, automatically handling chunking for long text.
 **Parameters:**
 - `text` (string, required): The document text to summarize
 - `max_length` (integer, optional): Maximum summary length in words (default: 100)
 **Returns:**
 ```json
 {
  "summary": "The summarized text...",
  "original_length": 12345,
  "method": "direct",  // or "chunked"
  "chunks": 1  // number of chunks used
 }
 ```
 ## Troubleshooting
 ### "Failed to connect to MCP server"
 1. **Check authentication**: Ensure you haven't selected `Bearer` without a key. Switch to `None` if no token is needed.
 2. **Check network connectivity**: Ensure OpenWebUI can reach the MCP server URL
 3. **Check LLM connectivity**: Ensure the MCP server can reach the LLM at OPENAPI_URL
 4. **Check timeouts**: Increase LLM_TIMEOUT if summarization takes too long
 ### Infinite loading screen
 This may occur if you configured the server as OpenAPI instead of MCP. Fix by:
 1. Opening Admin Settings → External Tools
 2. Disabling/deleting the problematic connection
 3. Re-adding with **Type** set to **MCP (Streamable HTTP)**
 ### Slow initialization
 If the server takes longer than 10 seconds to initialize:
 - Increase `MCP_INITIALIZE_TIMEOUT` in OpenWebUI (default: 10 seconds)
@@ -1,34 +0,0 @@
 #!/bin/bash
 # Diagnostic script for MCP Summary Server
 echo "================================"
 echo "MCP Summary Server Diagnostics"
 echo "================================"
 # Check if server is running
 echo -e "\n1. Checking if server process is running..."
 ps aux | grep mcp_summary_server || echo "Server process not found"
 # Check if port is listening
 echo -e "\n2. Checking if port is listening..."
 netstat -tlnp 2>/dev/null | grep 8080 || echo "Port 8080 not listening"
 # Test basic connectivity
 echo -e "\n3. Testing basic connectivity..."
 curl -s http://localhost:8080/ || echo "Cannot connect to localhost:8080"
 # Test MCP initialize
 echo -e "\n4. Testing MCP initialize..."
 curl -s -X POST http://localhost:8080/ \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-11-25","capabilities":{},"clientInfo":{"name":"test","version":"1.0.0"}}}' | jq .
 # Test tools list
 echo -e "\n5. Testing tools list..."
 curl -s -X POST http://localhost:8080/ \
  -H "Content-Type: application/json" \
  -d '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' | jq .
 echo -e "\n================================"
 echo "Diagnostics complete"
 echo "================================"
@@ -24,11 +24,32 @@ Auth:
 import json
 import os
 import sys
 import logging
 from http.server import HTTPServer, BaseHTTPRequestHandler
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 import requests
 from requests.exceptions import RequestException
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger("mcp-summary")
 # MCP Server Configuration
 API_KEY = os.environ.get("API_KEY", "").strip()
 PORT = int(os.environ.get("PORT", "8080"))
 # LLM Configuration
 OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
 OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
 MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
 # Summarization Configuration
 CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))
 OVERLAP = int(os.environ.get("OVERLAP", "200"))
 TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
 MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100"))
 MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
 LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120"))
 # Tool definitions
 TOOLS_LIST: Dict[str, Any] = {
@@ -64,8 +85,7 @@ def get_bearer_token(headers: Any) -> Optional[str]:
 def require_auth(headers: Any) -> bool:
-    """Check authentication if API key is configured."""
+    """Check authentication. Returns True if auth passes or is not required."""
    # If API_KEY is not set, allow unauthenticated access
    if not API_KEY:
        return True
@@ -75,55 +95,52 @@ def require_auth(headers: Any) -> bool:
    return True
-def call_llm(text: str, system_prompt: str, max_tokens: int = 2000) -> str:
+def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
-    """Make an OpenAPI-compatible LLM call."""
+    """Make an OpenAPI-compatible LLM call with error handling."""
-    openapi_url = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
+    url = f"{OPENAPI_URL}/chat/completions"
    openapi_api_key = os.environ.get("OPENAPI_API_KEY", "")
    model_name = os.environ.get("MODEL_NAME", "gpt-4o")
    timeout = int(os.environ.get("LLM_TIMEOUT", "120"))
    url = f"{openapi_url}/chat/completions"
    headers = {
        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openapi_api_key}"
+        "Authorization": f"Bearer {OPENAPI_API_KEY}"
    }
    payload = {
-        "model": model_name,
+        "model": MODEL_NAME,
-        "messages": [
+        "messages": messages,
-            {"role": "system", "content": system_prompt},
+        "temperature": temperature,
-            {"role": "user", "content": text}
+        "max_tokens": 2000,
        ],
        "temperature": 0.3,
        "max_tokens": max_tokens,
        "top_p": 0.9
    }
-    response = requests.post(url, headers=headers, json=payload, timeout=timeout)
+    try:
-    response.raise_for_status()
+        logger.info(f"Calling LLM at {OPENAPI_URL} with model {MODEL_NAME}")
        response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT)
        response.raise_for_status()
        data = response.json()
        return data["choices"][0]["message"]["content"]
-    data = response.json()
+    except RequestException as e:
-    return data["choices"][0]["message"]["content"]
+        logger.error(f"LLM request failed: {e}")
        raise RuntimeError(f"Failed to connect to LLM at {OPENAPI_URL}: {str(e)}")
    except Exception as e:
        logger.error(f"LLM call failed: {e}")
        raise RuntimeError(f"LLM call failed: {str(e)}")
-def chunk_text(text: str) -> list:
+def chunk_text(text: str) -> List[str]:
    """Split text into chunks with overlap for summarization."""
-    chunk_size = int(os.environ.get("CHUNK_SIZE", "4000"))
+    if len(text) <= CHUNK_SIZE:
    overlap = int(os.environ.get("OVERLAP", "200"))
    if len(text) <= chunk_size:
        return [text]
    chunks = []
    start = 0
    while start < len(text):
-        end = min(start + chunk_size, len(text))
+        end = min(start + CHUNK_SIZE, len(text))
        # Try to break at sentence/paragraph boundary
        break_point = end
        for marker in ["\n\n", "\n", ". ", "! ", "? "]:
-            pos = text.rfind(marker, start + chunk_size // 2, end)
+            pos = text.rfind(marker, start + CHUNK_SIZE // 2, end)
            if pos > start:
                break_point = pos
                break
@@ -132,84 +149,46 @@ def chunk_text(text: str) -> list:
        if chunk.strip():
            chunks.append(chunk)
-        start = break_point - overlap if break_point < len(text) else len(text)
+        start = break_point - OVERLAP if break_point < len(text) else len(text)
        if start >= len(text):
            break
    logger.info(f"Split text into {len(chunks)} chunks")
    return chunks
-def summarize_document(text: str, max_length: int = 100) -> dict:
+def summarize_chunk(chunk_text: str, chunk_num: int, total_chunks: int) -> str:
-    """
+    """Summarize a single chunk of text."""
-    Main summarization function.
+    system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
    - If text is short, summarize directly
    - If text is long, chunk and summarize each chunk, then synthesize
    """
    original_length = len(text)
    text = text.strip()
    if not text:
        raise ValueError("Empty text provided")
    max_direct_length = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
    intermediate_length = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
    # Direct summarization for shorter texts
    if len(text) <= max_direct_length:
        system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
-Create a summary that:
+You are processing chunk {chunk_num} of {total_chunks} from a larger document.
 - Is approximately {max_length} words
 - Captures key points and important details
 - Uses clear, professional language
 - Preserves names, dates, and specific facts
 Format as plain text without bullet points."""
        user_prompt = f"""Summarize the following document:
 {text}
 Summary:"""
        summary = call_llm(user_prompt, system_prompt)
        return {
            "summary": summary,
            "original_length": original_length,
            "method": "direct",
            "chunks": 1
        }
    # Chunked summarization for longer texts
    chunks = chunk_text(text)
    chunk_summaries = []
    for i, chunk in enumerate(chunks, 1):
        system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
 You are processing chunk {i} of {len(chunks)} from a larger document.
 Create a focused summary that:
 - Captures key points and important details
- Is approximately {intermediate_length} words
+- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words
 - Can be combined with other chunk summaries
 - Uses clear, professional language
 - Preserves names, dates, and specific facts
 Respond as plain text without bullet points."""
-        
+    
-        user_prompt = f"""Summarize this text (chunk {i} of {len(chunks)}):
+    user_prompt = f"""Summarize this text (chunk {chunk_num} of {total_chunks}):
-{chunk}
+{chunk_text}
 Summary:"""
        chunk_summary = call_llm(user_prompt, system_prompt)
        chunk_summaries.append(chunk_summary)
-    # Synthesize into final summary
+    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    logger.info(f"Summarizing chunk {chunk_num}/{total_chunks}")
    return call_llm(messages)
 def synthesize_summaries(chunk_summaries: List[str]) -> str:
    """Synthesize multiple chunk summaries into a single final summary."""
    combined = "\n\n".join(chunk_summaries)
    system_prompt = """You are a precise legal assistant creating executive-level summaries.
@@ -230,7 +209,71 @@ Format as a single paragraph of plain text."""
 Final summary:"""
-    final_summary = call_llm(user_prompt, system_prompt)
+    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    logger.info(f"Synthesizing {len(chunk_summaries)} chunk summaries")
    return call_llm(messages)
 def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]:
    """
    Main summarization function.
    - If text is short, summarize directly
    - If text is long, chunk and summarize each chunk, then synthesize
    """
    original_length = len(text)
    text = text.strip()
    if not text:
        raise ValueError("Empty text provided")
    logger.info(f"Summarizing text of {original_length} characters")
    # Direct summarization for shorter texts
    if len(text) <= MAX_DIRECT_TEXT_LENGTH:
        system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
 Create a summary that:
 - Is approximately {max_length} words
 - Captures key points and important details
 - Uses clear, professional language
 - Preserves names, dates, and specific facts
 Format as plain text without bullet points."""
        user_prompt = f"""Summarize the following document:
 {text}
 Summary:"""
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
        summary = call_llm(messages)
        return {
            "summary": summary,
            "original_length": original_length,
            "method": "direct",
            "chunks": 1
        }
    # Chunked summarization for longer texts
    chunks = chunk_text(text)
    chunk_summaries = []
    for i, chunk in enumerate(chunks, 1):
        chunk_summary = summarize_chunk(chunk, i, len(chunks))
        chunk_summaries.append(chunk_summary)
    final_summary = synthesize_summaries(chunk_summaries)
    return {
        "summary": final_summary,
@@ -244,9 +287,8 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
    """HTTP handler for MCP summary server."""
    def log_message(self, format, *args):
-        # Quiet logs by default
+        logger.info(format % args)
-        pass
+    
    def _send_json(self, status: int, payload: Any):
        """Send JSON response."""
        body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
@@ -255,52 +297,57 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)
-
+    
-    def _auth_or_401(self) -> bool:
+    def _auth_or_401(self):
-        """Check authentication if API key is configured."""
+        """Check authentication. Returns False if auth fails."""
        try:
            return require_auth(self.headers)
        except PermissionError:
            self._send_json(401, {"error": "Missing or invalid API key"})
            return False
-
+    
    def do_GET(self):
        """Handle GET requests (health check)."""
        if self.path == "/":
            self._send_json(200, {
                "service": "mcp-summary",
                "transport": "streamable-http",
                "model": MODEL_NAME,
                "status": "running",
                "docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
            })
            return
-
+        
        self.send_error(404, "Not Found")
-
+    
    def do_POST(self):
        """Handle MCP JSON-RPC requests."""
        # Streamable HTTP MCP endpoint
        if self.path not in ("/", "/mcp"):
            self.send_error(404, "Not Found")
            return
-
+        
        if not self._auth_or_401():
            return
-
+        
        length = int(self.headers.get("Content-Length", 0))
        if length == 0:
            self._send_json(400, {"error": "Empty body"})
            return
-
+        
        raw = self.rfile.read(length)
        try:
            req = json.loads(raw)
        except json.JSONDecodeError:
            self._send_json(400, {"error": "Invalid JSON"})
            return
-
+        
        method = req.get("method")
        params = req.get("params") or {}
        req_id = req.get("id")
-
+        
        logger.info(f"MCP request: method={method}, id={req_id}")
        # MCP: initialize
        if method == "initialize":
            self._send_json(200, {
@@ -318,7 +365,16 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
                }
            })
            return
-
+        
        # MCP: ping
        if method == "ping":
            self._send_json(200, {
                "jsonrpc": "2.0",
                "id": req_id,
                "result": {}
            })
            return
        # MCP: tools/list
        if method == "tools/list":
            self._send_json(200, {
@@ -327,7 +383,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
                "result": TOOLS_LIST
            })
            return
-
+        
        # MCP: tools/call
        if method == "tools/call":
            tool_name = params.get("name")
@@ -344,6 +400,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
                    }
                })
            except Exception as e:
                logger.error(f"Tool call failed: {e}", exc_info=True)
                self._send_json(200, {
                    "jsonrpc": "2.0",
                    "id": req_id,
@@ -353,10 +410,10 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
                    }
                })
            return
-
+        
        # Unknown method
        self._send_json(400, {"error": "Unknown method: " + str(method)})
-
+    
    def _call_tool(self, name: str, args: Dict[str, Any]) -> Any:
        """Execute a tool call."""
        if name == "summarize_document":
@@ -364,9 +421,9 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
            if not text:
                raise ValueError("Text parameter is required")
-            max_length = args.get("max_length", 100)
+            max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH)
            return summarize_document(text, max_length)
-
+        
        raise ValueError(f"Unknown tool: {name}")
@@ -376,6 +433,11 @@ def main():
    server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler)
    mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
    print(f"MCP Summary Server listening on 0.0.0.0:{port} [{mode}]")
    print(f"  - Model: {MODEL_NAME}")
    print(f"  - LLM URL: {OPENAPI_URL}")
    print(f"  - Chunk size: {CHUNK_SIZE} characters")
    print(f"  - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
    print(f"  - LLM timeout: {LLM_TIMEOUT} seconds")
    try:
        server.serve_forever()
    except KeyboardInterrupt:
@@ -1,4 +0,0 @@
 # requirements.txt for MCP Summary Server
 # HTTP requests for LLM communication
 requests>=2.31.0
Author	SHA1	Message	Date
akadmin	511137edae	Fix: Correct authentication and add ping method	2026-06-14 06:08:38 +00:00
akadmin	491745733f	Initial commit: MCP Summary Server	2026-06-14 05:57:46 +00:00