Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 511137edae | |||
| 491745733f |
@@ -1,32 +0,0 @@
|
|||||||
# MCP Summary Server - Environment Variables
|
|
||||||
|
|
||||||
# Server Configuration
|
|
||||||
PORT=8080
|
|
||||||
|
|
||||||
# Authentication (optional)
|
|
||||||
# If set, requests must include: Authorization: Bearer <API_KEY>
|
|
||||||
API_KEY=
|
|
||||||
|
|
||||||
# LLM Configuration
|
|
||||||
OPENAPI_URL=http://localhost:8080/v1
|
|
||||||
OPENAPI_API_KEY=
|
|
||||||
MODEL_NAME=gpt-4o
|
|
||||||
|
|
||||||
# LLM Call Timeout in seconds (increase for large documents)
|
|
||||||
LLM_TIMEOUT=120
|
|
||||||
|
|
||||||
# Summarization Configuration
|
|
||||||
# Characters per chunk when splitting long text
|
|
||||||
CHUNK_SIZE=4000
|
|
||||||
|
|
||||||
# Characters of overlap between chunks to maintain context
|
|
||||||
OVERLAP=200
|
|
||||||
|
|
||||||
# Target length for intermediate chunk summaries (words)
|
|
||||||
TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
|
|
||||||
|
|
||||||
# Maximum length for final synthesized summary (words)
|
|
||||||
MAX_DIRECT_SUMMARY_LENGTH=100
|
|
||||||
|
|
||||||
# Maximum text length (characters) before chunking is triggered
|
|
||||||
MAX_DIRECT_TEXT_LENGTH=8000
|
|
||||||
-37
@@ -1,37 +0,0 @@
|
|||||||
# Dockerfile for MCP Summary Server
|
|
||||||
#
|
|
||||||
# Usage (from directory containing this Dockerfile and mcp_summary_server.py):
|
|
||||||
#
|
|
||||||
# docker build -t mcp-summary .
|
|
||||||
# docker run -p 8080:8080 --env-file .env mcp-summary
|
|
||||||
#
|
|
||||||
|
|
||||||
FROM python:3.12-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Install runtime dependencies
|
|
||||||
COPY requirements.txt .
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
|
|
||||||
|
|
||||||
# Copy the server script
|
|
||||||
COPY mcp_summary_server.py /app/mcp_summary_server.py
|
|
||||||
|
|
||||||
# Expose HTTP port
|
|
||||||
EXPOSE 8080
|
|
||||||
|
|
||||||
# Environment variables
|
|
||||||
ENV PORT=8080
|
|
||||||
ENV OPENAPI_URL=http://localhost:8080/v1
|
|
||||||
ENV OPENAPI_API_KEY=
|
|
||||||
ENV MODEL_NAME=gpt-4o
|
|
||||||
ENV CHUNK_SIZE=4000
|
|
||||||
ENV OVERLAP=200
|
|
||||||
ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
|
|
||||||
ENV MAX_DIRECT_SUMMARY_LENGTH=100
|
|
||||||
ENV MAX_DIRECT_TEXT_LENGTH=8000
|
|
||||||
ENV LLM_TIMEOUT=120
|
|
||||||
ENV API_KEY=
|
|
||||||
|
|
||||||
# Start the MCP summary server
|
|
||||||
ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"]
|
|
||||||
@@ -1,137 +0,0 @@
|
|||||||
# MCP Summary Server
|
|
||||||
|
|
||||||
An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- Automatically determines whether to summarize directly or use chunked summarization
|
|
||||||
- All processing happens server-side
|
|
||||||
- Returns only the summary to the client
|
|
||||||
- Configurable chunking parameters
|
|
||||||
- Bearer token authentication (optional)
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
|
|
||||||
Copy `.env.example` to `.env` and configure:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cp .env.example .env
|
|
||||||
```
|
|
||||||
|
|
||||||
| Variable | Default | Description |
|
|
||||||
|----------|---------|-------------|
|
|
||||||
| PORT | 8080 | HTTP server port |
|
|
||||||
| API_KEY | (empty) | Bearer token for authentication |
|
|
||||||
| OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
|
|
||||||
| OPENAPI_API_KEY | (empty) | LLM API key |
|
|
||||||
| MODEL_NAME | gpt-4o | LLM model to use |
|
|
||||||
| LLM_TIMEOUT | 120 | LLM call timeout in seconds |
|
|
||||||
| CHUNK_SIZE | 4000 | Characters per chunk |
|
|
||||||
| OVERLAP | 200 | Characters of overlap between chunks |
|
|
||||||
| TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
|
|
||||||
| MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length |
|
|
||||||
| MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking |
|
|
||||||
|
|
||||||
## Running
|
|
||||||
|
|
||||||
### Docker
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build
|
|
||||||
docker build -t mcp-summary .
|
|
||||||
|
|
||||||
# Run with environment file
|
|
||||||
docker run -p 8080:8080 --env-file .env mcp-summary
|
|
||||||
|
|
||||||
# Run with inline environment variables
|
|
||||||
docker run -p 8080:8080 \
|
|
||||||
-e OPENAPI_URL=http://localhost:8080/v1 \
|
|
||||||
-e OPENAPI_API_KEY=your-key \
|
|
||||||
-e MODEL_NAME=gpt-4o \
|
|
||||||
mcp-summary
|
|
||||||
```
|
|
||||||
|
|
||||||
### Python
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
python mcp_summary_server.py
|
|
||||||
```
|
|
||||||
|
|
||||||
## Connecting to OpenWebUI
|
|
||||||
|
|
||||||
### In OpenWebUI Admin Settings
|
|
||||||
|
|
||||||
1. Go to **Admin Settings → External Tools**
|
|
||||||
2. Click **+ (Add Server)**
|
|
||||||
3. Set **Type** to **MCP (Streamable HTTP)**
|
|
||||||
4. Enter your **Server URL**
|
|
||||||
5. Set **Authentication**:
|
|
||||||
- **None** if no API key is configured
|
|
||||||
- **Bearer** if API_KEY is set (provide the key)
|
|
||||||
6. Save
|
|
||||||
|
|
||||||
### Docker Networking
|
|
||||||
|
|
||||||
If running both OpenWebUI and MCP Summary in Docker:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Use host.docker.internal to reach host machine
|
|
||||||
docker run -p 8080:8080 \
|
|
||||||
-e OPENAPI_URL=http://host.docker.internal:3000/v1 \
|
|
||||||
-e OPENAPI_API_KEY=your-key \
|
|
||||||
mcp-summary
|
|
||||||
```
|
|
||||||
|
|
||||||
If both containers are on the same Docker network, use the container name directly:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run --network mynetwork -p 8080:8080 \
|
|
||||||
-e OPENAPI_URL=http://openwebui-container:8080/v1 \
|
|
||||||
-e OPENAPI_API_KEY=your-key \
|
|
||||||
mcp-summary
|
|
||||||
```
|
|
||||||
|
|
||||||
## MCP Tool
|
|
||||||
|
|
||||||
### summarize_document
|
|
||||||
|
|
||||||
Summarizes a document, automatically handling chunking for long text.
|
|
||||||
|
|
||||||
**Parameters:**
|
|
||||||
- `text` (string, required): The document text to summarize
|
|
||||||
- `max_length` (integer, optional): Maximum summary length in words (default: 100)
|
|
||||||
|
|
||||||
**Returns:**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"summary": "The summarized text...",
|
|
||||||
"original_length": 12345,
|
|
||||||
"method": "direct", // or "chunked"
|
|
||||||
"chunks": 1 // number of chunks used
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### "Failed to connect to MCP server"
|
|
||||||
|
|
||||||
1. **Check authentication**: Ensure you haven't selected `Bearer` without a key. Switch to `None` if no token is needed.
|
|
||||||
2. **Check network connectivity**: Ensure OpenWebUI can reach the MCP server URL
|
|
||||||
3. **Check LLM connectivity**: Ensure the MCP server can reach the LLM at OPENAPI_URL
|
|
||||||
4. **Check timeouts**: Increase LLM_TIMEOUT if summarization takes too long
|
|
||||||
|
|
||||||
### Infinite loading screen
|
|
||||||
|
|
||||||
This may occur if you configured the server as OpenAPI instead of MCP. Fix by:
|
|
||||||
|
|
||||||
1. Opening Admin Settings → External Tools
|
|
||||||
2. Disabling/deleting the problematic connection
|
|
||||||
3. Re-adding with **Type** set to **MCP (Streamable HTTP)**
|
|
||||||
|
|
||||||
### Slow initialization
|
|
||||||
|
|
||||||
If the server takes longer than 10 seconds to initialize:
|
|
||||||
- Increase `MCP_INITIALIZE_TIMEOUT` in OpenWebUI (default: 10 seconds)
|
|
||||||
Binary file not shown.
-34
@@ -1,34 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Diagnostic script for MCP Summary Server
|
|
||||||
|
|
||||||
echo "================================"
|
|
||||||
echo "MCP Summary Server Diagnostics"
|
|
||||||
echo "================================"
|
|
||||||
|
|
||||||
# Check if server is running
|
|
||||||
echo -e "\n1. Checking if server process is running..."
|
|
||||||
ps aux | grep mcp_summary_server || echo "Server process not found"
|
|
||||||
|
|
||||||
# Check if port is listening
|
|
||||||
echo -e "\n2. Checking if port is listening..."
|
|
||||||
netstat -tlnp 2>/dev/null | grep 8080 || echo "Port 8080 not listening"
|
|
||||||
|
|
||||||
# Test basic connectivity
|
|
||||||
echo -e "\n3. Testing basic connectivity..."
|
|
||||||
curl -s http://localhost:8080/ || echo "Cannot connect to localhost:8080"
|
|
||||||
|
|
||||||
# Test MCP initialize
|
|
||||||
echo -e "\n4. Testing MCP initialize..."
|
|
||||||
curl -s -X POST http://localhost:8080/ \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-11-25","capabilities":{},"clientInfo":{"name":"test","version":"1.0.0"}}}' | jq .
|
|
||||||
|
|
||||||
# Test tools list
|
|
||||||
echo -e "\n5. Testing tools list..."
|
|
||||||
curl -s -X POST http://localhost:8080/ \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' | jq .
|
|
||||||
|
|
||||||
echo -e "\n================================"
|
|
||||||
echo "Diagnostics complete"
|
|
||||||
echo "================================"
|
|
||||||
+173
-111
@@ -24,11 +24,32 @@ Auth:
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import logging
|
||||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
import requests
|
import requests
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
logger = logging.getLogger("mcp-summary")
|
||||||
|
|
||||||
|
# MCP Server Configuration
|
||||||
API_KEY = os.environ.get("API_KEY", "").strip()
|
API_KEY = os.environ.get("API_KEY", "").strip()
|
||||||
|
PORT = int(os.environ.get("PORT", "8080"))
|
||||||
|
|
||||||
|
# LLM Configuration
|
||||||
|
OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
|
||||||
|
OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
|
||||||
|
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
|
||||||
|
|
||||||
|
# Summarization Configuration
|
||||||
|
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))
|
||||||
|
OVERLAP = int(os.environ.get("OVERLAP", "200"))
|
||||||
|
TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
|
||||||
|
MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100"))
|
||||||
|
MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
|
||||||
|
LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120"))
|
||||||
|
|
||||||
# Tool definitions
|
# Tool definitions
|
||||||
TOOLS_LIST: Dict[str, Any] = {
|
TOOLS_LIST: Dict[str, Any] = {
|
||||||
@@ -64,8 +85,7 @@ def get_bearer_token(headers: Any) -> Optional[str]:
|
|||||||
|
|
||||||
|
|
||||||
def require_auth(headers: Any) -> bool:
|
def require_auth(headers: Any) -> bool:
|
||||||
"""Check authentication if API key is configured."""
|
"""Check authentication. Returns True if auth passes or is not required."""
|
||||||
# If API_KEY is not set, allow unauthenticated access
|
|
||||||
if not API_KEY:
|
if not API_KEY:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -75,55 +95,52 @@ def require_auth(headers: Any) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def call_llm(text: str, system_prompt: str, max_tokens: int = 2000) -> str:
|
def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
|
||||||
"""Make an OpenAPI-compatible LLM call."""
|
"""Make an OpenAPI-compatible LLM call with error handling."""
|
||||||
openapi_url = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
|
url = f"{OPENAPI_URL}/chat/completions"
|
||||||
openapi_api_key = os.environ.get("OPENAPI_API_KEY", "")
|
|
||||||
model_name = os.environ.get("MODEL_NAME", "gpt-4o")
|
|
||||||
timeout = int(os.environ.get("LLM_TIMEOUT", "120"))
|
|
||||||
|
|
||||||
url = f"{openapi_url}/chat/completions"
|
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {openapi_api_key}"
|
"Authorization": f"Bearer {OPENAPI_API_KEY}"
|
||||||
}
|
}
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model_name,
|
"model": MODEL_NAME,
|
||||||
"messages": [
|
"messages": messages,
|
||||||
{"role": "system", "content": system_prompt},
|
"temperature": temperature,
|
||||||
{"role": "user", "content": text}
|
"max_tokens": 2000,
|
||||||
],
|
|
||||||
"temperature": 0.3,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"top_p": 0.9
|
"top_p": 0.9
|
||||||
}
|
}
|
||||||
|
|
||||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
try:
|
||||||
response.raise_for_status()
|
logger.info(f"Calling LLM at {OPENAPI_URL} with model {MODEL_NAME}")
|
||||||
|
response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
return data["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
data = response.json()
|
except RequestException as e:
|
||||||
return data["choices"][0]["message"]["content"]
|
logger.error(f"LLM request failed: {e}")
|
||||||
|
raise RuntimeError(f"Failed to connect to LLM at {OPENAPI_URL}: {str(e)}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"LLM call failed: {e}")
|
||||||
|
raise RuntimeError(f"LLM call failed: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
def chunk_text(text: str) -> list:
|
def chunk_text(text: str) -> List[str]:
|
||||||
"""Split text into chunks with overlap for summarization."""
|
"""Split text into chunks with overlap for summarization."""
|
||||||
chunk_size = int(os.environ.get("CHUNK_SIZE", "4000"))
|
if len(text) <= CHUNK_SIZE:
|
||||||
overlap = int(os.environ.get("OVERLAP", "200"))
|
|
||||||
|
|
||||||
if len(text) <= chunk_size:
|
|
||||||
return [text]
|
return [text]
|
||||||
|
|
||||||
chunks = []
|
chunks = []
|
||||||
start = 0
|
start = 0
|
||||||
|
|
||||||
while start < len(text):
|
while start < len(text):
|
||||||
end = min(start + chunk_size, len(text))
|
end = min(start + CHUNK_SIZE, len(text))
|
||||||
|
|
||||||
# Try to break at sentence/paragraph boundary
|
|
||||||
break_point = end
|
break_point = end
|
||||||
for marker in ["\n\n", "\n", ". ", "! ", "? "]:
|
for marker in ["\n\n", "\n", ". ", "! ", "? "]:
|
||||||
pos = text.rfind(marker, start + chunk_size // 2, end)
|
pos = text.rfind(marker, start + CHUNK_SIZE // 2, end)
|
||||||
if pos > start:
|
if pos > start:
|
||||||
break_point = pos
|
break_point = pos
|
||||||
break
|
break
|
||||||
@@ -132,84 +149,46 @@ def chunk_text(text: str) -> list:
|
|||||||
if chunk.strip():
|
if chunk.strip():
|
||||||
chunks.append(chunk)
|
chunks.append(chunk)
|
||||||
|
|
||||||
start = break_point - overlap if break_point < len(text) else len(text)
|
start = break_point - OVERLAP if break_point < len(text) else len(text)
|
||||||
if start >= len(text):
|
if start >= len(text):
|
||||||
break
|
break
|
||||||
|
|
||||||
|
logger.info(f"Split text into {len(chunks)} chunks")
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
def summarize_document(text: str, max_length: int = 100) -> dict:
|
def summarize_chunk(chunk_text: str, chunk_num: int, total_chunks: int) -> str:
|
||||||
"""
|
"""Summarize a single chunk of text."""
|
||||||
Main summarization function.
|
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
||||||
|
|
||||||
- If text is short, summarize directly
|
|
||||||
- If text is long, chunk and summarize each chunk, then synthesize
|
|
||||||
"""
|
|
||||||
original_length = len(text)
|
|
||||||
|
|
||||||
text = text.strip()
|
|
||||||
if not text:
|
|
||||||
raise ValueError("Empty text provided")
|
|
||||||
|
|
||||||
max_direct_length = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
|
|
||||||
intermediate_length = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
|
|
||||||
|
|
||||||
# Direct summarization for shorter texts
|
|
||||||
if len(text) <= max_direct_length:
|
|
||||||
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
|
||||||
|
|
||||||
Create a summary that:
|
You are processing chunk {chunk_num} of {total_chunks} from a larger document.
|
||||||
- Is approximately {max_length} words
|
|
||||||
- Captures key points and important details
|
|
||||||
- Uses clear, professional language
|
|
||||||
- Preserves names, dates, and specific facts
|
|
||||||
|
|
||||||
Format as plain text without bullet points."""
|
|
||||||
|
|
||||||
user_prompt = f"""Summarize the following document:
|
|
||||||
|
|
||||||
{text}
|
|
||||||
|
|
||||||
Summary:"""
|
|
||||||
|
|
||||||
summary = call_llm(user_prompt, system_prompt)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"summary": summary,
|
|
||||||
"original_length": original_length,
|
|
||||||
"method": "direct",
|
|
||||||
"chunks": 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Chunked summarization for longer texts
|
|
||||||
chunks = chunk_text(text)
|
|
||||||
|
|
||||||
chunk_summaries = []
|
|
||||||
for i, chunk in enumerate(chunks, 1):
|
|
||||||
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
|
||||||
|
|
||||||
You are processing chunk {i} of {len(chunks)} from a larger document.
|
|
||||||
|
|
||||||
Create a focused summary that:
|
Create a focused summary that:
|
||||||
- Captures key points and important details
|
- Captures key points and important details
|
||||||
- Is approximately {intermediate_length} words
|
- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words
|
||||||
- Can be combined with other chunk summaries
|
- Can be combined with other chunk summaries
|
||||||
- Uses clear, professional language
|
- Uses clear, professional language
|
||||||
- Preserves names, dates, and specific facts
|
- Preserves names, dates, and specific facts
|
||||||
|
|
||||||
Respond as plain text without bullet points."""
|
Respond as plain text without bullet points."""
|
||||||
|
|
||||||
user_prompt = f"""Summarize this text (chunk {i} of {len(chunks)}):
|
user_prompt = f"""Summarize this text (chunk {chunk_num} of {total_chunks}):
|
||||||
|
|
||||||
{chunk}
|
{chunk_text}
|
||||||
|
|
||||||
Summary:"""
|
Summary:"""
|
||||||
|
|
||||||
chunk_summary = call_llm(user_prompt, system_prompt)
|
|
||||||
chunk_summaries.append(chunk_summary)
|
|
||||||
|
|
||||||
# Synthesize into final summary
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_prompt}
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"Summarizing chunk {chunk_num}/{total_chunks}")
|
||||||
|
return call_llm(messages)
|
||||||
|
|
||||||
|
|
||||||
|
def synthesize_summaries(chunk_summaries: List[str]) -> str:
|
||||||
|
"""Synthesize multiple chunk summaries into a single final summary."""
|
||||||
combined = "\n\n".join(chunk_summaries)
|
combined = "\n\n".join(chunk_summaries)
|
||||||
|
|
||||||
system_prompt = """You are a precise legal assistant creating executive-level summaries.
|
system_prompt = """You are a precise legal assistant creating executive-level summaries.
|
||||||
@@ -230,7 +209,71 @@ Format as a single paragraph of plain text."""
|
|||||||
|
|
||||||
Final summary:"""
|
Final summary:"""
|
||||||
|
|
||||||
final_summary = call_llm(user_prompt, system_prompt)
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_prompt}
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(f"Synthesizing {len(chunk_summaries)} chunk summaries")
|
||||||
|
return call_llm(messages)
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Main summarization function.
|
||||||
|
|
||||||
|
- If text is short, summarize directly
|
||||||
|
- If text is long, chunk and summarize each chunk, then synthesize
|
||||||
|
"""
|
||||||
|
original_length = len(text)
|
||||||
|
|
||||||
|
text = text.strip()
|
||||||
|
if not text:
|
||||||
|
raise ValueError("Empty text provided")
|
||||||
|
|
||||||
|
logger.info(f"Summarizing text of {original_length} characters")
|
||||||
|
|
||||||
|
# Direct summarization for shorter texts
|
||||||
|
if len(text) <= MAX_DIRECT_TEXT_LENGTH:
|
||||||
|
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
|
||||||
|
|
||||||
|
Create a summary that:
|
||||||
|
- Is approximately {max_length} words
|
||||||
|
- Captures key points and important details
|
||||||
|
- Uses clear, professional language
|
||||||
|
- Preserves names, dates, and specific facts
|
||||||
|
|
||||||
|
Format as plain text without bullet points."""
|
||||||
|
|
||||||
|
user_prompt = f"""Summarize the following document:
|
||||||
|
|
||||||
|
{text}
|
||||||
|
|
||||||
|
Summary:"""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_prompt}
|
||||||
|
]
|
||||||
|
|
||||||
|
summary = call_llm(messages)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"summary": summary,
|
||||||
|
"original_length": original_length,
|
||||||
|
"method": "direct",
|
||||||
|
"chunks": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Chunked summarization for longer texts
|
||||||
|
chunks = chunk_text(text)
|
||||||
|
|
||||||
|
chunk_summaries = []
|
||||||
|
for i, chunk in enumerate(chunks, 1):
|
||||||
|
chunk_summary = summarize_chunk(chunk, i, len(chunks))
|
||||||
|
chunk_summaries.append(chunk_summary)
|
||||||
|
|
||||||
|
final_summary = synthesize_summaries(chunk_summaries)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"summary": final_summary,
|
"summary": final_summary,
|
||||||
@@ -244,9 +287,8 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
"""HTTP handler for MCP summary server."""
|
"""HTTP handler for MCP summary server."""
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
# Quiet logs by default
|
logger.info(format % args)
|
||||||
pass
|
|
||||||
|
|
||||||
def _send_json(self, status: int, payload: Any):
|
def _send_json(self, status: int, payload: Any):
|
||||||
"""Send JSON response."""
|
"""Send JSON response."""
|
||||||
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||||
@@ -255,52 +297,57 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
self.send_header("Content-Length", str(len(body)))
|
self.send_header("Content-Length", str(len(body)))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(body)
|
self.wfile.write(body)
|
||||||
|
|
||||||
def _auth_or_401(self) -> bool:
|
def _auth_or_401(self):
|
||||||
"""Check authentication if API key is configured."""
|
"""Check authentication. Returns False if auth fails."""
|
||||||
try:
|
try:
|
||||||
return require_auth(self.headers)
|
return require_auth(self.headers)
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
self._send_json(401, {"error": "Missing or invalid API key"})
|
self._send_json(401, {"error": "Missing or invalid API key"})
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
"""Handle GET requests (health check)."""
|
"""Handle GET requests (health check)."""
|
||||||
if self.path == "/":
|
if self.path == "/":
|
||||||
self._send_json(200, {
|
self._send_json(200, {
|
||||||
"service": "mcp-summary",
|
"service": "mcp-summary",
|
||||||
"transport": "streamable-http",
|
"transport": "streamable-http",
|
||||||
|
"model": MODEL_NAME,
|
||||||
|
"status": "running",
|
||||||
"docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
|
"docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
self.send_error(404, "Not Found")
|
self.send_error(404, "Not Found")
|
||||||
|
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
"""Handle MCP JSON-RPC requests."""
|
"""Handle MCP JSON-RPC requests."""
|
||||||
|
# Streamable HTTP MCP endpoint
|
||||||
if self.path not in ("/", "/mcp"):
|
if self.path not in ("/", "/mcp"):
|
||||||
self.send_error(404, "Not Found")
|
self.send_error(404, "Not Found")
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self._auth_or_401():
|
if not self._auth_or_401():
|
||||||
return
|
return
|
||||||
|
|
||||||
length = int(self.headers.get("Content-Length", 0))
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
if length == 0:
|
if length == 0:
|
||||||
self._send_json(400, {"error": "Empty body"})
|
self._send_json(400, {"error": "Empty body"})
|
||||||
return
|
return
|
||||||
|
|
||||||
raw = self.rfile.read(length)
|
raw = self.rfile.read(length)
|
||||||
try:
|
try:
|
||||||
req = json.loads(raw)
|
req = json.loads(raw)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
self._send_json(400, {"error": "Invalid JSON"})
|
self._send_json(400, {"error": "Invalid JSON"})
|
||||||
return
|
return
|
||||||
|
|
||||||
method = req.get("method")
|
method = req.get("method")
|
||||||
params = req.get("params") or {}
|
params = req.get("params") or {}
|
||||||
req_id = req.get("id")
|
req_id = req.get("id")
|
||||||
|
|
||||||
|
logger.info(f"MCP request: method={method}, id={req_id}")
|
||||||
|
|
||||||
# MCP: initialize
|
# MCP: initialize
|
||||||
if method == "initialize":
|
if method == "initialize":
|
||||||
self._send_json(200, {
|
self._send_json(200, {
|
||||||
@@ -318,7 +365,16 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# MCP: ping
|
||||||
|
if method == "ping":
|
||||||
|
self._send_json(200, {
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": req_id,
|
||||||
|
"result": {}
|
||||||
|
})
|
||||||
|
return
|
||||||
|
|
||||||
# MCP: tools/list
|
# MCP: tools/list
|
||||||
if method == "tools/list":
|
if method == "tools/list":
|
||||||
self._send_json(200, {
|
self._send_json(200, {
|
||||||
@@ -327,7 +383,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
"result": TOOLS_LIST
|
"result": TOOLS_LIST
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
# MCP: tools/call
|
# MCP: tools/call
|
||||||
if method == "tools/call":
|
if method == "tools/call":
|
||||||
tool_name = params.get("name")
|
tool_name = params.get("name")
|
||||||
@@ -344,6 +400,7 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.error(f"Tool call failed: {e}", exc_info=True)
|
||||||
self._send_json(200, {
|
self._send_json(200, {
|
||||||
"jsonrpc": "2.0",
|
"jsonrpc": "2.0",
|
||||||
"id": req_id,
|
"id": req_id,
|
||||||
@@ -353,10 +410,10 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
return
|
return
|
||||||
|
|
||||||
# Unknown method
|
# Unknown method
|
||||||
self._send_json(400, {"error": "Unknown method: " + str(method)})
|
self._send_json(400, {"error": "Unknown method: " + str(method)})
|
||||||
|
|
||||||
def _call_tool(self, name: str, args: Dict[str, Any]) -> Any:
|
def _call_tool(self, name: str, args: Dict[str, Any]) -> Any:
|
||||||
"""Execute a tool call."""
|
"""Execute a tool call."""
|
||||||
if name == "summarize_document":
|
if name == "summarize_document":
|
||||||
@@ -364,9 +421,9 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
|
|||||||
if not text:
|
if not text:
|
||||||
raise ValueError("Text parameter is required")
|
raise ValueError("Text parameter is required")
|
||||||
|
|
||||||
max_length = args.get("max_length", 100)
|
max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH)
|
||||||
return summarize_document(text, max_length)
|
return summarize_document(text, max_length)
|
||||||
|
|
||||||
raise ValueError(f"Unknown tool: {name}")
|
raise ValueError(f"Unknown tool: {name}")
|
||||||
|
|
||||||
|
|
||||||
@@ -376,6 +433,11 @@ def main():
|
|||||||
server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler)
|
server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler)
|
||||||
mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
|
mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
|
||||||
print(f"MCP Summary Server listening on 0.0.0.0:{port} [{mode}]")
|
print(f"MCP Summary Server listening on 0.0.0.0:{port} [{mode}]")
|
||||||
|
print(f" - Model: {MODEL_NAME}")
|
||||||
|
print(f" - LLM URL: {OPENAPI_URL}")
|
||||||
|
print(f" - Chunk size: {CHUNK_SIZE} characters")
|
||||||
|
print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
|
||||||
|
print(f" - LLM timeout: {LLM_TIMEOUT} seconds")
|
||||||
try:
|
try:
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
# requirements.txt for MCP Summary Server
|
|
||||||
|
|
||||||
# HTTP requests for LLM communication
|
|
||||||
requests>=2.31.0
|
|
||||||
Reference in New Issue
Block a user