Compare commits

..

9 Commits

7 changed files with 834 additions and 326 deletions
+32
View File
@@ -0,0 +1,32 @@
# MCP Summary Server - Environment Variables
# Server Configuration
PORT=8080
# Authentication (optional)
# If set, requests must include: Authorization: Bearer <API_KEY>
API_KEY=
# LLM Configuration
OPENAPI_URL=http://localhost:8080/v1
OPENAPI_API_KEY=
MODEL_NAME=gpt-4o
# LLM Call Timeout in seconds (increase for large documents)
LLM_TIMEOUT=120
# Summarization Configuration
# Characters per chunk when splitting long text
CHUNK_SIZE=4000
# Characters of overlap between chunks to maintain context
OVERLAP=200
# Target length for intermediate chunk summaries (words)
TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
# Maximum length for final synthesized summary (words)
MAX_DIRECT_SUMMARY_LENGTH=100
# Maximum text length (characters) before chunking is triggered
MAX_DIRECT_TEXT_LENGTH=8000
+37
View File
@@ -0,0 +1,37 @@
# Dockerfile for MCP Summary Server
#
# Usage (from directory containing this Dockerfile and mcp_summary_server.py):
#
# docker build -t mcp-summary .
# docker run -p 8080:8080 --env-file .env mcp-summary
#
FROM python:3.12-slim
WORKDIR /app
# Install runtime dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt
# Copy the server script
COPY mcp_summary_server.py /app/mcp_summary_server.py
# Expose HTTP port
EXPOSE 8080
# Environment variables
ENV PORT=8080
ENV OPENAPI_URL=http://localhost:8080/v1
ENV OPENAPI_API_KEY=
ENV MODEL_NAME=gpt-4o
ENV CHUNK_SIZE=4000
ENV OVERLAP=200
ENV TARGET_INTERMEDIATE_SUMMARY_LENGTH=150
ENV MAX_DIRECT_SUMMARY_LENGTH=100
ENV MAX_DIRECT_TEXT_LENGTH=8000
ENV LLM_TIMEOUT=120
ENV API_KEY=
# Start the MCP summary server
ENTRYPOINT ["python", "-u", "/app/mcp_summary_server.py"]
+137
View File
@@ -0,0 +1,137 @@
# MCP Summary Server
An MCP (Model Context Protocol) server for document summarization that keeps full text out of the chat context window.
## Features
- Automatically determines whether to summarize directly or use chunked summarization
- All processing happens server-side
- Returns only the summary to the client
- Configurable chunking parameters
- Bearer token authentication (optional)
## Setup
### Environment Variables
Copy `.env.example` to `.env` and configure:
```bash
cp .env.example .env
```
| Variable | Default | Description |
|----------|---------|-------------|
| PORT | 8080 | HTTP server port |
| API_KEY | (empty) | Bearer token for authentication |
| OPENAPI_URL | http://localhost:8080/v1 | LLM API endpoint |
| OPENAPI_API_KEY | (empty) | LLM API key |
| MODEL_NAME | gpt-4o | LLM model to use |
| LLM_TIMEOUT | 120 | LLM call timeout in seconds |
| CHUNK_SIZE | 4000 | Characters per chunk |
| OVERLAP | 200 | Characters of overlap between chunks |
| TARGET_INTERMEDIATE_SUMMARY_LENGTH | 150 | Words per chunk summary |
| MAX_DIRECT_SUMMARY_LENGTH | 100 | Max final summary length |
| MAX_DIRECT_TEXT_LENGTH | 8000 | Max text length before chunking |
## Running
### Docker
```bash
# Build
docker build -t mcp-summary .
# Run with environment file
docker run -p 8080:8080 --env-file .env mcp-summary
# Run with inline environment variables
docker run -p 8080:8080 \
-e OPENAPI_URL=http://localhost:8080/v1 \
-e OPENAPI_API_KEY=your-key \
-e MODEL_NAME=gpt-4o \
mcp-summary
```
### Python
```bash
pip install -r requirements.txt
python mcp_summary_server.py
```
## Connecting to OpenWebUI
### In OpenWebUI Admin Settings
1. Go to **Admin Settings → External Tools**
2. Click **+ (Add Server)**
3. Set **Type** to **MCP (Streamable HTTP)**
4. Enter your **Server URL**
5. Set **Authentication**:
- **None** if no API key is configured
- **Bearer** if API_KEY is set (provide the key)
6. Save
### Docker Networking
If running both OpenWebUI and MCP Summary in Docker:
```bash
# Use host.docker.internal to reach host machine
docker run -p 8080:8080 \
-e OPENAPI_URL=http://host.docker.internal:3000/v1 \
-e OPENAPI_API_KEY=your-key \
mcp-summary
```
If both containers are on the same Docker network, use the container name directly:
```bash
docker run --network mynetwork -p 8080:8080 \
-e OPENAPI_URL=http://openwebui-container:8080/v1 \
-e OPENAPI_API_KEY=your-key \
mcp-summary
```
## MCP Tool
### summarize_document
Summarizes a document, automatically handling chunking for long text.
**Parameters:**
- `text` (string, required): The document text to summarize
- `max_length` (integer, optional): Maximum summary length in words (default: 100)
**Returns:**
```json
{
"summary": "The summarized text...",
"original_length": 12345,
"method": "direct", // or "chunked"
"chunks": 1 // number of chunks used
}
```
## Troubleshooting
### "Failed to connect to MCP server"
1. **Check authentication**: Ensure you haven't selected `Bearer` without a key. Switch to `None` if no token is needed.
2. **Check network connectivity**: Ensure OpenWebUI can reach the MCP server URL
3. **Check LLM connectivity**: Ensure the MCP server can reach the LLM at OPENAPI_URL
4. **Check timeouts**: Increase LLM_TIMEOUT if summarization takes too long
### Infinite loading screen
This may occur if you configured the server as OpenAPI instead of MCP. Fix by:
1. Opening Admin Settings → External Tools
2. Disabling/deleting the problematic connection
3. Re-adding with **Type** set to **MCP (Streamable HTTP)**
### Slow initialization
If the server takes longer than 10 seconds to initialize:
- Increase `MCP_INITIALIZE_TIMEOUT` in OpenWebUI (default: 10 seconds)
Binary file not shown.
+34
View File
@@ -0,0 +1,34 @@
#!/bin/bash
# Diagnostic script for MCP Summary Server
echo "================================"
echo "MCP Summary Server Diagnostics"
echo "================================"
# Check if server is running
echo -e "\n1. Checking if server process is running..."
ps aux | grep mcp_summary_server || echo "Server process not found"
# Check if port is listening
echo -e "\n2. Checking if port is listening..."
netstat -tlnp 2>/dev/null | grep 8080 || echo "Port 8080 not listening"
# Test basic connectivity
echo -e "\n3. Testing basic connectivity..."
curl -s http://localhost:8080/ || echo "Cannot connect to localhost:8080"
# Test MCP initialize
echo -e "\n4. Testing MCP initialize..."
curl -s -X POST http://localhost:8080/ \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-11-25","capabilities":{},"clientInfo":{"name":"test","version":"1.0.0"}}}' | jq .
# Test tools list
echo -e "\n5. Testing tools list..."
curl -s -X POST http://localhost:8080/ \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":2,"method":"tools/list","params":{}}' | jq .
echo -e "\n================================"
echo "Diagnostics complete"
echo "================================"
+508 -244
View File
@@ -4,15 +4,25 @@ MCP Summary Server (Streamable HTTP transport)
Designed to work with OpenWebUI's MCP (Streamable HTTP) integration. Designed to work with OpenWebUI's MCP (Streamable HTTP) integration.
Summarizes documents by: Features:
1. Checking text length - Multiple specialized summarization, comparison, and extraction tools.
2. If short, summarizing directly with LLM - Automatic chunking and synthesis for long documents.
3. If long, chunking text, summarizing each chunk, then synthesizing - Temporary in-memory storage of document chunks/summaries for continued use.
- Configurable cache limits via environment variables.
All processing happens server-side, keeping full text out of the chat context window.
Tools: Tools:
- summarize_document: Summarize a document (handles chunking automatically) - summarize_document
- summarize_executive_brief
- summarize_bullet_points
- summarize_for_court
- compare_documents
- extract_key_points
- extract_action_items
- extract_entities
- summarize_very_long_document
- retrieve_document_data
- query_stored_document
- clear_document_cache
Auth: Auth:
- If API_KEY is set: - If API_KEY is set:
@@ -24,14 +34,19 @@ Auth:
import json import json
import os import os
import sys import sys
import time
import uuid
import logging import logging
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional, Tuple
import requests import requests
from requests.exceptions import RequestException
# Configure logging # Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
stream=sys.stdout,
)
logger = logging.getLogger("mcp-summary") logger = logging.getLogger("mcp-summary")
# MCP Server Configuration # MCP Server Configuration
@@ -42,255 +57,423 @@ PORT = int(os.environ.get("PORT", "8080"))
OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1") OPENAPI_URL = os.environ.get("OPENAPI_URL", "http://localhost:8080/v1")
OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "") OPENAPI_API_KEY = os.environ.get("OPENAPI_API_KEY", "")
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o") MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
# Summarization Configuration
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))
OVERLAP = int(os.environ.get("OVERLAP", "200"))
TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
MAX_DIRECT_SUMMARY_LENGTH = int(os.environ.get("MAX_DIRECT_SUMMARY_LENGTH", "100"))
MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120")) LLM_TIMEOUT = int(os.environ.get("LLM_TIMEOUT", "120"))
# Tool definitions # Chunking Configuration
TOOLS_LIST: Dict[str, Any] = { CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "4000"))
"tools": [ OVERLAP = int(os.environ.get("OVERLAP", "200"))
{ MAX_DIRECT_TEXT_LENGTH = int(os.environ.get("MAX_DIRECT_TEXT_LENGTH", "8000"))
"name": "summarize_document", TARGET_INTERMEDIATE_SUMMARY_LENGTH = int(os.environ.get("TARGET_INTERMEDIATE_SUMMARY_LENGTH", "150"))
"description": "Summarize a document. Automatically handles chunking for long text. Returns a concise summary without exposing the full text.",
"inputSchema": { # Cache Configuration
"type": "object", MAX_STORED_DOCS = int(os.environ.get("MAX_STORED_DOCS", "500"))
"properties": { CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", "86400")) # 24h default
"text": {
"type": "string", # Temporary in-memory store
"description": "The document text to summarize" DOCUMENT_STORE: Dict[str, Dict[str, Any]] = {}
},
"max_length": {
"type": "integer", def generate_doc_id() -> str:
"description": "Maximum length of summary in words (default: 100)" return str(uuid.uuid4())
}
},
"required": ["text"] def evict_oldest_if_needed():
} if len(DOCUMENT_STORE) <= MAX_STORED_DOCS:
} return
] # Remove oldest N entries to stay within limit
sorted_keys = sorted(DOCUMENT_STORE.keys(), key=lambda k: DOCUMENT_STORE[k]["created_at"])
to_remove = len(DOCUMENT_STORE) - MAX_STORED_DOCS
for k in sorted_keys[:to_remove]:
DOCUMENT_STORE.pop(k, None)
def store_document(doc_id: str, text_length: int, chunks: List[str],
intermediate_summaries: List[str], final_output: str,
tool_used: str):
evict_oldest_if_needed()
DOCUMENT_STORE[doc_id] = {
"text_length": text_length,
"chunks_count": len(chunks),
"chunks": chunks,
"intermediate_summaries": intermediate_summaries,
"final_output": final_output,
"tool_used": tool_used,
"created_at": time.time()
} }
def get_bearer_token(headers: Any) -> Optional[str]: def get_document(doc_id: str) -> Optional[Dict[str, Any]]:
"""Extract bearer token from Authorization header.""" doc = DOCUMENT_STORE.get(doc_id)
auth = (headers.get("Authorization") or "").strip() if not doc:
if auth.startswith("Bearer "):
return auth[len("Bearer "):].strip()
return None return None
# TTL check
if time.time() - doc["created_at"] > CACHE_TTL_SECONDS:
DOCUMENT_STORE.pop(doc_id, None)
return None
return doc
def require_auth(headers: Any) -> bool: def call_llm(system_prompt: str, user_prompt: str, max_tokens: int = 2000) -> str:
"""Check authentication. Returns True if auth passes or is not required."""
if not API_KEY:
return True
token = get_bearer_token(headers)
if not token or token != API_KEY:
raise PermissionError("Missing or invalid API key")
return True
def call_llm(messages: List[Dict], temperature: float = 0.3) -> str:
"""Make an OpenAPI-compatible LLM call with error handling."""
url = f"{OPENAPI_URL}/chat/completions" url = f"{OPENAPI_URL}/chat/completions"
headers = { headers = {
"Content-Type": "application/json", "Content-Type": "application/json",
"Authorization": f"Bearer {OPENAPI_API_KEY}" "Authorization": f"Bearer {OPENAPI_API_KEY}"
} }
payload = { payload = {
"model": MODEL_NAME, "model": MODEL_NAME,
"messages": messages, "messages": [
"temperature": temperature, {"role": "system", "content": system_prompt},
"max_tokens": 2000, {"role": "user", "content": user_prompt}
],
"temperature": 0.3,
"max_tokens": max_tokens,
"top_p": 0.9 "top_p": 0.9
} }
logger.info(f"Calling LLM: {OPENAPI_URL} model={MODEL_NAME}")
try:
logger.info(f"Calling LLM at {OPENAPI_URL} with model {MODEL_NAME}")
response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT) response = requests.post(url, headers=headers, json=payload, timeout=LLM_TIMEOUT)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
return data["choices"][0]["message"]["content"] return data["choices"][0]["message"]["content"]
except RequestException as e:
logger.error(f"LLM request failed: {e}")
raise RuntimeError(f"Failed to connect to LLM at {OPENAPI_URL}: {str(e)}")
except Exception as e:
logger.error(f"LLM call failed: {e}")
raise RuntimeError(f"LLM call failed: {str(e)}")
def chunk_text(text: str) -> List[str]: def chunk_text(text: str) -> List[str]:
"""Split text into chunks with overlap for summarization."""
if len(text) <= CHUNK_SIZE: if len(text) <= CHUNK_SIZE:
return [text] return [text]
chunks = [] chunks = []
start = 0 start = 0
while start < len(text): while start < len(text):
end = min(start + CHUNK_SIZE, len(text)) end = min(start + CHUNK_SIZE, len(text))
break_point = end break_point = end
for marker in ["\n\n", "\n", ". ", "! ", "? "]: for marker in ["\n\n", "\n", ". ", "! ", "? "]:
pos = text.rfind(marker, start + CHUNK_SIZE // 2, end) pos = text.rfind(marker, start + CHUNK_SIZE // 2, end)
if pos > start: if pos > start:
break_point = pos break_point = pos
break break
chunk = text[start:break_point] chunk = text[start:break_point]
if chunk.strip(): if chunk.strip():
chunks.append(chunk) chunks.append(chunk)
start = break_point - OVERLAP if break_point < len(text) else len(text) start = break_point - OVERLAP if break_point < len(text) else len(text)
if start >= len(text): if start >= len(text):
break break
logger.info(f"Split text into {len(chunks)} chunks")
return chunks return chunks
def summarize_chunk(chunk_text: str, chunk_num: int, total_chunks: int) -> str: def build_tool_prompts(tool_name: str) -> Tuple[str, str, str]:
"""Summarize a single chunk of text."""
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries.
You are processing chunk {chunk_num} of {total_chunks} from a larger document.
Create a focused summary that:
- Captures key points and important details
- Is approximately {TARGET_INTERMEDIATE_SUMMARY_LENGTH} words
- Can be combined with other chunk summaries
- Uses clear, professional language
- Preserves names, dates, and specific facts
Respond as plain text without bullet points."""
user_prompt = f"""Summarize this text (chunk {chunk_num} of {total_chunks}):
{chunk_text}
Summary:"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
logger.info(f"Summarizing chunk {chunk_num}/{total_chunks}")
return call_llm(messages)
def synthesize_summaries(chunk_summaries: List[str]) -> str:
"""Synthesize multiple chunk summaries into a single final summary."""
combined = "\n\n".join(chunk_summaries)
system_prompt = """You are a precise legal assistant creating executive-level summaries.
Synthesize the provided partial summaries into a single, cohesive summary that:
- Is approximately 100 words
- Captures the complete document picture
- Is clear and professional
- Removes redundancy
- Maintains logical flow
- Preserves all critical information
Format as a single paragraph of plain text."""
user_prompt = f"""Synthesize these partial summaries into one cohesive summary:
{combined}
Final summary:"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
logger.info(f"Synthesizing {len(chunk_summaries)} chunk summaries")
return call_llm(messages)
def summarize_document(text: str, max_length: int = MAX_DIRECT_SUMMARY_LENGTH) -> Dict[str, Any]:
""" """
Main summarization function. Returns (system_prompt, chunk_user_template, synthesis_user_template)
Templates use {text} or {summaries} placeholders.
"""
base_system = "You are a precise legal assistant creating concise, accurate outputs."
- If text is short, summarize directly if tool_name == "summarize_document":
- If text is long, chunk and summarize each chunk, then synthesize sys_prompt = base_system + """
Create a clear, professional summary.
- Approximately {max_length} words.
- Capture key points, important details, names, dates, facts.
- Format as plain text without bullet points.
"""
chunk_user = "Summarize this text (chunk {i} of {total}):\n\n{text}\n\nSummary:"
synth_user = "Synthesize these partial summaries into one cohesive summary:\n\n{summaries}\n\nFinal summary:"
elif tool_name == "summarize_executive_brief":
sys_prompt = base_system + """
Create an executive brief:
- 12 paragraphs.
- High-level overview of issues, key findings, and outcomes.
- Professional tone, suitable for senior decision-makers.
- No bullet points.
"""
chunk_user = "Provide a concise executive-style summary of this chunk (chunk {i} of {total}):\n\n{text}\n\nExecutive summary:"
synth_user = "Combine these executive-style summaries into a single, clear executive brief:\n\n{summaries}\n\nFinal executive brief:"
elif tool_name == "summarize_bullet_points":
sys_prompt = base_system + """
Create a concise bullet-point summary:
- Use short bullets.
- Focus on key points, actions, dates, and outcomes.
- No long paragraphs.
"""
chunk_user = "Summarize this chunk as concise bullet points (chunk {i} of {total}):\n\n{text}\n\nBullet points:"
synth_user = "Merge these bullet-point summaries into one clean, non-redundant bullet list:\n\n{summaries}\n\nFinal bullet summary:"
elif tool_name == "summarize_for_court":
sys_prompt = base_system + """
Create a summary suitable for a judge or legal professional:
- Clearly state: parties, issues, key evidence, legal reasoning, outcome.
- Use formal, precise language.
- Keep it concise and structured.
"""
chunk_user = "Provide a court-style summary of this chunk (chunk {i} of {total}):\n\n{text}\n\nCourt summary:"
synth_user = "Combine these summaries into a single, structured summary suitable for a court:\n\n{summaries}\n\nFinal court-style summary:"
elif tool_name == "compare_documents":
sys_prompt = base_system + """
Compare two documents and highlight:
- Key differences and conflicts.
- Changes in facts, reasoning, or outcomes.
- Any new or removed conditions/requirements.
Be precise and concise.
"""
# For compare, we process both texts together; chunking applies if combined is long.
chunk_user = "Compare these excerpts and note key differences/conflicts (chunk {i} of {total}):\n\n{text}\n\nComparison:"
synth_user = "Synthesize these partial comparisons into a single, clear comparison summary:\n\n{summaries}\n\nFinal comparison:"
elif tool_name == "extract_key_points":
sys_prompt = base_system + """
Extract the key points from the text:
- Issues, holdings, obligations, dates, parties, statutes.
- Use concise bullet points.
- Do not add commentary.
"""
chunk_user = "Extract the key points from this chunk (chunk {i} of {total}):\n\n{text}\n\nKey points:"
synth_user = "Combine these extracted key points into one clean, non-redundant list:\n\n{summaries}\n\nFinal key points:"
elif tool_name == "extract_action_items":
sys_prompt = base_system + """
Extract all action items, deadlines, and obligations:
- Who must do what, by when.
- Use concise bullets.
- No extra commentary.
"""
chunk_user = "Extract action items from this chunk (chunk {i} of {total}):\n\n{text}\n\nAction items:"
synth_user = "Combine these action items into one clear, non-redundant list:\n\n{summaries}\n\nFinal action items:"
elif tool_name == "extract_entities":
sys_prompt = base_system + """
Extract important entities:
- People, organizations, locations, dates, legal references, case names.
- Use concise bullets, grouped by type.
- No extra commentary.
"""
chunk_user = "Extract entities from this chunk (chunk {i} of {total}):\n\n{text}\n\nEntities:"
synth_user = "Merge these entity lists into one clean, grouped list:\n\n{summaries}\n\nFinal entities:"
elif tool_name == "summarize_very_long_document":
sys_prompt = base_system + """
Create a concise, structured summary optimized for very long documents:
- Preserve core issues, reasoning, outcomes, and critical details.
- Use clear paragraphs; avoid fluff.
"""
chunk_user = "Summarize this chunk from a very long document (chunk {i} of {total}):\n\n{text}\n\nSummary:"
synth_user = "Synthesize these summaries into one concise, structured summary of the full document:\n\n{summaries}\n\nFinal summary:"
else:
# Fallback
sys_prompt = base_system
chunk_user = "Process this chunk (chunk {i} of {total}):\n\n{text}"
synth_user = "Combine these results:\n\n{summaries}"
return sys_prompt, chunk_user, synth_user
def process_with_chunking(
text: str,
tool_name: str,
max_length: int = 100
) -> Tuple[str, List[str], List[str]]:
"""
Returns (final_output, chunks, intermediate_summaries)
""" """
original_length = len(text) original_length = len(text)
text = text.strip() text = text.strip()
if not text: if not text:
raise ValueError("Empty text provided") raise ValueError("Empty text provided")
logger.info(f"Summarizing text of {original_length} characters") sys_prompt, chunk_user_tpl, synth_user_tpl = build_tool_prompts(tool_name)
# Direct summarization for shorter texts # If short, direct processing
if len(text) <= MAX_DIRECT_TEXT_LENGTH: if len(text) <= MAX_DIRECT_TEXT_LENGTH:
system_prompt = f"""You are a precise legal assistant creating concise, accurate summaries. user_prompt = chunk_user_tpl.format(
i=1, total=1, text=text, max_length=max_length
)
final_output = call_llm(sys_prompt, user_prompt)
return final_output, [text], [final_output]
Create a summary that: # Chunked processing
- Is approximately {max_length} words
- Captures key points and important details
- Uses clear, professional language
- Preserves names, dates, and specific facts
Format as plain text without bullet points."""
user_prompt = f"""Summarize the following document:
{text}
Summary:"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
summary = call_llm(messages)
return {
"summary": summary,
"original_length": original_length,
"method": "direct",
"chunks": 1
}
# Chunked summarization for longer texts
chunks = chunk_text(text) chunks = chunk_text(text)
intermediate_summaries = []
chunk_summaries = []
for i, chunk in enumerate(chunks, 1): for i, chunk in enumerate(chunks, 1):
chunk_summary = summarize_chunk(chunk, i, len(chunks)) user_prompt = chunk_user_tpl.format(i=i, total=len(chunks), text=chunk)
chunk_summaries.append(chunk_summary) summary = call_llm(sys_prompt, user_prompt)
intermediate_summaries.append(summary)
final_summary = synthesize_summaries(chunk_summaries) # Synthesis
combined = "\n\n".join(intermediate_summaries)
synth_prompt = synth_user_tpl.format(summaries=combined)
final_output = call_llm(sys_prompt, synth_prompt)
return { return final_output, chunks, intermediate_summaries
"summary": final_summary,
"original_length": original_length,
"method": "chunked", def compare_texts_with_chunking(text1: str, text2: str) -> Tuple[str, List[str], List[str]]:
"chunks": len(chunks) combined = f"=== DOCUMENT 1 ===\n\n{text1}\n\n=== DOCUMENT 2 ===\n\n{text2}"
return process_with_chunking(combined, "compare_documents")
def query_chunks(chunks: List[str], question: str) -> str:
"""
Simple semantic-style query: send question + chunks to LLM to extract relevant answers.
For very large chunk lists, we can limit or sample; here we send all but keep prompt tight.
"""
system_prompt = (
"You are a precise legal assistant. Answer the question strictly based on the provided text. "
"If the information is not present, say so clearly."
)
user_prompt = (
"Question:\n"
f"{question}\n\n"
"Text:\n"
+ "\n\n".join(chunks)
)
return call_llm(system_prompt, user_prompt, max_tokens=1500)
# Tool definitions
TOOLS_LIST: Dict[str, Any] = {
"tools": [
{
"name": "summarize_document",
"description": "General-purpose document summarization. Prefer this for long or complex documents to avoid context limits.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text to summarize."},
"max_length": {"type": "integer", "description": "Max summary length in words (default: 100)."}
},
"required": ["text"]
}
},
{
"name": "summarize_executive_brief",
"description": "Create a short executive brief (12 paragraphs) for senior decision-makers.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text."}
},
"required": ["text"]
}
},
{
"name": "summarize_bullet_points",
"description": "Create a concise bullet-point summary of key points.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text."}
},
"required": ["text"]
}
},
{
"name": "summarize_for_court",
"description": "Create a formal summary suitable for a judge or legal professional.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text."}
},
"required": ["text"]
}
},
{
"name": "compare_documents",
"description": "Compare two documents and highlight key differences, conflicts, and changes.",
"inputSchema": {
"type": "object",
"properties": {
"text1": {"type": "string", "description": "First document text."},
"text2": {"type": "string", "description": "Second document text."}
},
"required": ["text1", "text2"]
}
},
{
"name": "extract_key_points",
"description": "Extract key points: issues, holdings, obligations, dates, parties, statutes.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text."}
},
"required": ["text"]
}
},
{
"name": "extract_action_items",
"description": "Extract all action items, deadlines, and obligations.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text."}
},
"required": ["text"]
}
},
{
"name": "extract_entities",
"description": "Extract important entities: people, organizations, locations, dates, legal references.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Full document text."}
},
"required": ["text"]
}
},
{
"name": "summarize_very_long_document",
"description": "Optimized for very long documents with deeper chunking and hierarchical summarization.",
"inputSchema": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "Very long document text."}
},
"required": ["text"]
}
},
{
"name": "retrieve_document_data",
"description": "Retrieve stored data for a previously processed document by doc_id (final output, intermediate summaries, metadata).",
"inputSchema": {
"type": "object",
"properties": {
"doc_id": {"type": "string", "description": "Document ID returned when the document was first processed."}
},
"required": ["doc_id"]
}
},
{
"name": "query_stored_document",
"description": "Ask a question about a previously processed document using its stored chunks.",
"inputSchema": {
"type": "object",
"properties": {
"doc_id": {"type": "string", "description": "Document ID."},
"question": {"type": "string", "description": "Your question about the document."}
},
"required": ["doc_id", "question"]
}
},
{
"name": "clear_document_cache",
"description": "Clear all temporarily stored document data from this server.",
"inputSchema": {
"type": "object",
"properties": {},
"required": []
}
}
]
} }
class MCPSummaryHandler(BaseHTTPRequestHandler): class MCPSummaryHandler(BaseHTTPRequestHandler):
"""HTTP handler for MCP summary server."""
def log_message(self, format, *args): def log_message(self, format, *args):
logger.info(format % args) logger.info(format % args)
def _send_json(self, status: int, payload: Any): def _send_json(self, status: int, payload: Any):
"""Send JSON response."""
body = json.dumps(payload, ensure_ascii=False).encode("utf-8") body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
self.send_response(status) self.send_response(status)
self.send_header("Content-Type", "application/json") self.send_header("Content-Type", "application/json")
@@ -298,31 +481,36 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(body) self.wfile.write(body)
def _auth_or_401(self): def _auth_or_401(self) -> bool:
"""Check authentication. Returns False if auth fails.""" auth = (self.headers.get("Authorization") or "").strip()
try: if not API_KEY:
return require_auth(self.headers) return True
except PermissionError: if auth.startswith("Bearer "):
token = auth[len("Bearer "):].strip()
if token == API_KEY:
return True
self._send_json(401, {"error": "Missing or invalid API key"}) self._send_json(401, {"error": "Missing or invalid API key"})
return False return False
def do_GET(self): def do_GET(self):
"""Handle GET requests (health check).""" try:
if self.path == "/": if self.path == "/":
self._send_json(200, { self._send_json(200, {
"service": "mcp-summary", "service": "mcp-summary",
"transport": "streamable-http", "transport": "streamable-http",
"model": MODEL_NAME,
"status": "running",
"docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)." "docs": "Use POST / with MCP JSON-RPC (initialize, tools/list, tools/call)."
}) })
return return
self.send_error(404, "Not Found") self.send_error(404, "Not Found")
except Exception as e:
logger.error(f"GET error: {e}", exc_info=True)
try:
self.send_error(500, "Internal Server Error")
except Exception:
pass
def do_POST(self): def do_POST(self):
"""Handle MCP JSON-RPC requests.""" try:
# Streamable HTTP MCP endpoint
if self.path not in ("/", "/mcp"): if self.path not in ("/", "/mcp"):
self.send_error(404, "Not Found") self.send_error(404, "Not Found")
return return
@@ -348,43 +536,35 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
logger.info(f"MCP request: method={method}, id={req_id}") logger.info(f"MCP request: method={method}, id={req_id}")
# MCP: initialize # Notifications
if isinstance(method, str) and method.startswith("notifications/"):
if req_id is not None:
self._send_json(200, {"jsonrpc": "2.0", "id": req_id, "result": {}})
else:
self.send_response(200)
self.send_header("Content-Length", "0")
self.end_headers()
return
# initialize
if method == "initialize": if method == "initialize":
self._send_json(200, { self._send_json(200, {
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": req_id, "id": req_id,
"result": { "result": {
"protocolVersion": "2025-11-25", "protocolVersion": "2025-11-25",
"capabilities": { "capabilities": {"tools": {}},
"tools": {} "serverInfo": {"name": "mcp-summary", "version": "1.0.0"}
},
"serverInfo": {
"name": "mcp-summary",
"version": "1.0.0"
}
} }
}) })
return return
# MCP: ping # tools/list
if method == "ping":
self._send_json(200, {
"jsonrpc": "2.0",
"id": req_id,
"result": {}
})
return
# MCP: tools/list
if method == "tools/list": if method == "tools/list":
self._send_json(200, { self._send_json(200, {"jsonrpc": "2.0", "id": req_id, "result": TOOLS_LIST})
"jsonrpc": "2.0",
"id": req_id,
"result": TOOLS_LIST
})
return return
# MCP: tools/call # tools/call
if method == "tools/call": if method == "tools/call":
tool_name = params.get("name") tool_name = params.get("name")
tool_args = params.get("arguments") or {} tool_args = params.get("arguments") or {}
@@ -400,48 +580,132 @@ class MCPSummaryHandler(BaseHTTPRequestHandler):
} }
}) })
except Exception as e: except Exception as e:
logger.error(f"Tool call failed: {e}", exc_info=True) logger.error(f"Tool call error: {e}", exc_info=True)
self._send_json(200, { self._send_json(200, {
"jsonrpc": "2.0", "jsonrpc": "2.0",
"id": req_id, "id": req_id,
"error": { "error": {"code": -32000, "message": str(e)}
"code": -32000,
"message": str(e)
}
}) })
return return
# Unknown method
self._send_json(400, {"error": "Unknown method: " + str(method)}) self._send_json(400, {"error": "Unknown method: " + str(method)})
except Exception as e:
logger.error(f"POST error: {e}", exc_info=True)
try:
self.send_error(500, "Internal Server Error")
except Exception:
pass
def _call_tool(self, name: str, args: Dict[str, Any]) -> Any: def _call_tool(self, name: str, args: Dict[str, Any]) -> Any:
"""Execute a tool call.""" # General single-text tools
if name == "summarize_document": if name in (
"summarize_document",
"summarize_executive_brief",
"summarize_bullet_points",
"summarize_for_court",
"extract_key_points",
"extract_action_items",
"extract_entities",
"summarize_very_long_document"
):
text = args.get("text") text = args.get("text")
if not text: if not text:
raise ValueError("Text parameter is required") raise ValueError("Text parameter is required")
max_length = args.get("max_length", 100)
final_output, chunks, intermediate_summaries = process_with_chunking(
text, name, max_length
)
doc_id = generate_doc_id()
store_document(doc_id, len(text), chunks, intermediate_summaries, final_output, name)
return {
"doc_id": doc_id,
"tool": name,
"result": final_output,
"metadata": {
"original_length": len(text),
"chunks": len(chunks)
}
}
max_length = args.get("max_length", MAX_DIRECT_SUMMARY_LENGTH) # compare_documents
return summarize_document(text, max_length) if name == "compare_documents":
text1 = args.get("text1")
text2 = args.get("text2")
if not text1 or not text2:
raise ValueError("text1 and text2 are required")
final_output, chunks, intermediate_summaries = compare_texts_with_chunking(text1, text2)
doc_id = generate_doc_id()
store_document(doc_id, len(text1) + len(text2), chunks, intermediate_summaries, final_output, name)
return {
"doc_id": doc_id,
"tool": name,
"result": final_output,
"metadata": {
"original_length_1": len(text1),
"original_length_2": len(text2),
"chunks": len(chunks)
}
}
# retrieve_document_data
if name == "retrieve_document_data":
doc_id = args.get("doc_id")
if not doc_id:
raise ValueError("doc_id is required")
doc = get_document(doc_id)
if not doc:
raise ValueError("Document not found or expired")
# Return metadata + final_output + intermediate_summaries (chunks on demand if needed)
return {
"doc_id": doc_id,
"tool_used": doc["tool_used"],
"final_output": doc["final_output"],
"intermediate_summaries": doc["intermediate_summaries"],
"metadata": {
"text_length": doc["text_length"],
"chunks_count": doc["chunks_count"],
"created_at": doc["created_at"]
}
}
# query_stored_document
if name == "query_stored_document":
doc_id = args.get("doc_id")
question = args.get("question")
if not doc_id or not question:
raise ValueError("doc_id and question are required")
doc = get_document(doc_id)
if not doc:
raise ValueError("Document not found or expired")
answer = query_chunks(doc["chunks"], question)
return {
"doc_id": doc_id,
"question": question,
"answer": answer
}
# clear_document_cache
if name == "clear_document_cache":
DOCUMENT_STORE.clear()
return {"status": "ok", "message": "Document cache cleared."}
raise ValueError(f"Unknown tool: {name}") raise ValueError(f"Unknown tool: {name}")
def main(): def main():
"""Start the MCP summary server."""
port = int(sys.argv[1]) if len(sys.argv) > 1 else int(os.environ.get("PORT", "8080")) port = int(sys.argv[1]) if len(sys.argv) > 1 else int(os.environ.get("PORT", "8080"))
logger.info(f"Starting MCP Summary Server on 0.0.0.0:{port}")
logger.info(f"Auth mode: {'Bearer (API_KEY set)' if API_KEY else 'none (API_KEY not set)'}")
logger.info(f"LLM URL: {OPENAPI_URL}")
logger.info(f"Model: {MODEL_NAME}")
logger.info(f"Cache: max_docs={MAX_STORED_DOCS}, ttl={CACHE_TTL_SECONDS}s")
server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler) server = HTTPServer(("0.0.0.0", port), MCPSummaryHandler)
mode = "auth enabled (Bearer)" if API_KEY else "no auth (API_KEY not set)"
print(f"MCP Summary Server listening on 0.0.0.0:{port} [{mode}]")
print(f" - Model: {MODEL_NAME}")
print(f" - LLM URL: {OPENAPI_URL}")
print(f" - Chunk size: {CHUNK_SIZE} characters")
print(f" - Max direct text: {MAX_DIRECT_TEXT_LENGTH} characters")
print(f" - LLM timeout: {LLM_TIMEOUT} seconds")
try: try:
logger.info(f"MCP Summary Server listening on 0.0.0.0:{port}")
server.serve_forever() server.serve_forever()
except KeyboardInterrupt: except KeyboardInterrupt:
print("\nShutting down...") logger.info("Shutting down...")
server.server_close() server.server_close()
+4
View File
@@ -0,0 +1,4 @@
# requirements.txt for MCP Summary Server
# HTTP requests for LLM communication
requests>=2.31.0