diff --git a/Cargo.toml b/Cargo.toml index 1dc758e..42d524f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,12 @@ once_cell = "1.20" # Command line argument parsing clap = { version = "4.5", features = ["derive", "env"] } +# HTTP server for HTML interface +axum = { version = "0.7", features = ["ws", "json"] } +tower-http = { version = "0.5", features = ["cors"] } +hyper = { version = "1.4", features = ["full"] } +tokio-tungstenite = "0.21" + # Optional external tool support headless_chrome = { version = "1.0", optional = true } wkhtmltopdf = { version = "0.4", optional = true } @@ -102,6 +108,7 @@ wkhtmltopdf = { version = "0.4", optional = true } [features] default = ["embedded-fonts", "pure-rust-pdf"] runtime-server = [] +http-server = [] advanced-docx = [] embedded-fonts = [] pure-rust-pdf = [] diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..cb4d9a7 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,338 @@ +# docx-mcp Server - Deployment Guide + +## Server Architecture + +This MCP server supports: +- **stdio mode** (default): stdin/stdout for MCP clients. +- **HTTP mode**: Web interface for HTML/browser access over LAN. +- **Templates directory**: User-provided .docx templates for reuse and fill-in generation. +- **High-fidelity PDF conversion**: Via LibreOffice (included in Docker image). + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ DEPLOYMENT MODES │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Mode 1: stdio (Local MCP Clients) │ +│ ┌───────────┐ stdio ┌──────────────────┐ │ +│ │ MCP │ ◄────────► │ docx-mcp │ │ +│ │ Client │ │ (container) │ │ +│ └───────────┘ └──────────────────┘ │ +│ │ +│ Mode 2: HTTP (HTML Interface - LAN) │ +│ ┌───────────┐ HTTP:3000 ┌──────────────────┐ │ +│ │ Browser │ ◄────────────►│ docx-mcp │ │ +│ │ (HTML) │ │ (container) │ │ +│ └───────────┘ └──────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +## Docker Image + +There is now a single, unified Dockerfile that includes: +- HTTP server (HTML interface) +- stdio MCP transport +- LibreOffice (high-fidelity PDF conversion) +- Templates directory support +- Sandboxed, non-root configuration + +Build: + +```bash +docker build -t docx-mcp:full . +``` + +## Deployment + +### HTTP Mode (HTML Interface - LAN) + +Run the HTTP server with templates and output directories mounted: + +```bash +docker run --rm \ + --name docx-mcp-http \ + -p 3000:3000 \ + -v /host/path/templates:/templates:ro \ + -v /host/path/output:/out \ + -e DOCX_MCP_HTTP=true \ + -e DOCX_MCP_HTTP_ADDRESS=0.0.0.0:3000 \ + -e DOCX_MCP_TEMPLATES_DIR=/templates \ + -e DOCX_MCP_MAX_SIZE=104857600 \ + -e DOCX_MCP_MAX_DOCS=30 \ + --memory 1g \ + --cpus 1.5 \ + docx-mcp:full +``` + +Access: +- HTML Interface: http://your-server-ip:3000 +- API: http://your-server-ip:3000/api/tools +- WebSocket: ws://your-server-ip:3000/ws + +### stdio Mode (for MCP Clients) + +Useful when launched by an MCP client (e.g., Claude Desktop, Cursor). + +```bash +docker run --rm \ + --name docx-mcp-stdio \ + -v /host/path/templates:/templates:ro \ + -v /host/path/output:/out \ + -e DOCX_MCP_TEMPLATES_DIR=/templates \ + -e DOCX_MCP_MAX_SIZE=104857600 \ + -e DOCX_MCP_MAX_DOCS=30 \ + --memory 1g \ + --cpus 1.5 \ + docx-mcp:full +``` + +In MCP client config, point "command" to "docker run" with these flags. + +## Server Configuration + +### Command Line Arguments + +| Argument | Environment Variable | Description | +|----------|---------------------|-------------| +| `--http-mode` | `DOCX_MCP_HTTP=true` | Enable HTTP server mode | +| `--http-address` | `DOCX_MCP_HTTP_ADDRESS` | HTTP server address (default: 0.0.0.0:3000) | +| `--templates-dir` | `DOCX_MCP_TEMPLATES_DIR` | Directory with template .docx files (default: /templates) | +| `--readonly` | `DOCX_MCP_READONLY=true` | Enable readonly mode | +| `--sandbox` | `DOCX_MCP_SANDBOX=true` | Enable sandbox mode | +| `--no-external-tools` | `DOCX_MCP_NO_EXTERNAL_TOOLS=true` | Disable external tools (e.g., LibreOffice) | +| `--no-network` | `DOCX_MCP_NO_NETWORK=true` | Disable network operations | +| `--max-size` | `DOCX_MCP_MAX_SIZE` | Max document size in bytes | +| `--max-docs` | `DOCX_MCP_MAX_DOCS` | Max concurrent open documents | +| `--whitelist` | `DOCX_MCP_WHITELIST` | Allowed tools (comma-separated) | +| `--blacklist` | `DOCX_MCP_BLACKLIST` | Blocked tools (comma-separated) | + +### Example Configurations + +- HTTP mode with templates: + +```bash +docker run --rm \ + -p 3000:3000 \ + -v /host/path/templates:/templates:ro \ + -e DOCX_MCP_HTTP=true \ + -e DOCX_MCP_TEMPLATES_DIR=/templates \ + docx-mcp:full +``` + +- Readonly HTTP mode (limited tools): + +```bash +docker run --rm \ + -p 3000:3000 \ + -e DOCX_MCP_HTTP=true \ + -e DOCX_MCP_READONLY=true \ + -e DOCX_MCP_WHITELIST="list_templates,open_template,extract_text,get_metadata,search_text" \ + docx-mcp:full +``` + +## API Endpoints + +### HTML Interface +- GET / — Web interface (tool browser + templates panel) + +### REST API +- GET /api/tools — List available tools +- POST /api/call — Call a tool + +### WebSocket +- WS /ws — Real-time communication + +### API Examples + +- List tools: + +```bash +curl http://localhost:3000/api/tools +``` + +- Call a tool: + +```bash +curl -X POST http://localhost:3000/api/call \ + -H "Content-Type: application/json" \ + -d '{ + "name": "create_document", + "arguments": {} + }' +``` + +- List templates: + +```bash +curl -X POST http://localhost:3000/api/call \ + -H "Content-Type: application/json" \ + -d '{ + "name": "list_templates", + "arguments": {} + }' +``` + +- Open a template: + +```bash +curl -X POST http://localhost:3000/api/call \ + -H "Content-Type: application/json" \ + -d '{ + "name": "open_template", + "arguments": { "name": "nda_template.docx" } + }' +``` + +- Generate from template with fill-in fields: + +```bash +curl -X POST http://localhost:3000/api/call \ + -H "Content-Type: application/json" \ + -d '{ + "name": "generate_from_template", + "arguments": { + "template_name": "nda_template.docx", + "output_path": "/out/nda_acme.docx", + "fields": { + "CLIENT_NAME": "Acme Corp", + "EFFECTIVE_DATE": "2025-11-09" + } + } + }' +``` + +## Docker Compose (Production) + +Example with HTTP mode, templates, and output volumes: + +```yaml +version: '3.8' + +services: + docx-mcp: + image: docx-mcp:full + build: + context: . + dockerfile: Dockerfile + read_only: true + cap_drop: + - ALL + tmpfs: + - /tmp/docx-mcp:rw,noexec,nosuid,size=200m + volumes: + - ./templates:/templates:ro + - ./output:/out + ports: + - "3000:3000" + environment: + - RUST_LOG=info + - DOCX_MCP_HTTP=true + - DOCX_MCP_HTTP_ADDRESS=0.0.0.0:3000 + - DOCX_MCP_TEMPLATES_DIR=/templates + - DOCX_MCP_MAX_SIZE=104857600 + - DOCX_MCP_MAX_DOCS=30 + deploy: + resources: + limits: + memory: 1G + cpus: '1.5' + restart: unless-stopped + healthcheck: + test: ["CMD", "/usr/local/bin/docx-mcp", "--version"] + interval: 30s + timeout: 5s + retries: 3 +``` + +## Security Configuration + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `DOCX_MCP_HTTP` | `false` | Enable HTTP mode | +| `DOCX_MCP_HTTP_ADDRESS` | `0.0.0.0:3000` | HTTP server address | +| `DOCX_MCP_TEMPLATES_DIR` | `/templates` | Templates directory | +| `DOCX_MCP_READONLY` | `false` | Restrict to read-only operations | +| `DOCX_MCP_SANDBOX` | `true` | Restrict file operations to temp | +| `DOCX_MCP_NO_EXTERNAL_TOOLS` | `true` | Disable external tools | +| `DOCX_MCP_NO_NETWORK` | `true` | Disable network access | +| `DOCX_MCP_MAX_SIZE` | `104857600` | Max document size (bytes) | +| `DOCX_MCP_MAX_DOCS` | `30` | Max concurrent documents | +| `DOCX_MCP_WHITELIST` | - | Allowed tools (comma-separated) | +| `DOCX_MCP_BLACKLIST` | - | Blocked tools (comma-separated) | + +### Security Profiles + +- Readonly HTTP mode: + +```bash +docker run --rm \ + -p 3000:3000 \ + -e DOCX_MCP_HTTP=true \ + -e DOCX_MCP_READONLY=true \ + -e DOCX_MCP_WHITELIST="list_templates,open_template,extract_text,get_metadata,search_text" \ + docx-mcp:full +``` + +- Maximum security: + +```bash +docker run --rm \ + -p 3000:3000 \ + --read-only \ + --cap-drop ALL \ + --tmpfs /tmp/docx-mcp \ + -e DOCX_MCP_HTTP=true \ + -e DOCX_MCP_READONLY=true \ + -e DOCX_MCP_SANDBOX=true \ + -e DOCX_MCP_NO_EXTERNAL_TOOLS=true \ + -e DOCX_MCP_NO_NETWORK=true \ + docx-mcp:full +``` + +## Monitoring + +```bash +# View logs +docker logs -f docx-mcp-http + +# Check resource usage +docker stats docx-mcp-http + +# Verify security +docker inspect --format='{{.HostConfig.ReadOnly}}' docx-mcp-http # Should be true +``` + +## Troubleshooting + +### Common Issues + +1. Port already in use: + - Use a different port: + - -p 8080:8080 -e DOCX_MCP_HTTP_ADDRESS=0.0.0.0:8080 + +2. Permission denied on temp directory: + - Ensure temp directory is writable: + - --tmpfs /tmp/docx-mcp:rw + +3. Out of memory: + - Increase memory: + - --memory 2g + +4. CORS issues in browser: + - CORS is enabled for all origins on LAN by default. + - For production, restrict to specific origins as needed. + +## API Key + +No API key is required. Security relies on: +- OS-level access controls +- Container isolation +- Built-in command security (whitelist/blacklist) + +For LAN deployments, rely on: +- Network-level access controls +- Firewall rules +- Application-level authentication at the bridge diff --git a/DEPLOYMENT_QUICKREF.md b/DEPLOYMENT_QUICKREF.md new file mode 100644 index 0000000..40ebb5c --- /dev/null +++ b/DEPLOYMENT_QUICKREF.md @@ -0,0 +1,153 @@ +# docx-mcp Server - Deployment Quick Reference + +## Key Facts + +| Item | Value | +|------|-------| +| **Transport Method** | stdio (stdin/stdout) | +| **Network Port** | Not required for local use | +| **API Key** | Not required | +| **Authentication** | OS-level + container security | + +--- + +## Port Requirements + +### Local Deployment (Recommended) +**No port required** - the server communicates via stdin/stdout directly. + +### Remote Deployment (Optional) +If remote access is needed, wrap with a stdio-to-network bridge: + +| Bridge Type | Port | Protocol | +|-------------|------|----------| +| WebSocket | 8080 | ws:// | +| TCP | 8080 | tcp:// | + +--- + +## Quick Start + +### Build +```bash +# Minimal (recommended) +docker build -f Dockerfile.sandboxed -t docx-mcp:sandboxed . + +# With LibreOffice (better PDF conversion) +docker build -f Dockerfile.libreoffice -t docx-mcp:libreoffice . +``` + +### Run (Local) +```bash +docker run --rm \ + --name docx-mcp \ + --read-only \ + --cap-drop ALL \ + --tmpfs /tmp/docx-mcp \ + --memory 512m \ + docx-mcp:sandboxed +``` + +### Run (Remote via Docker Compose) +```bash +docker-compose up -d +``` + +--- + +## MCP Client Configuration + +### Claude Desktop +```json +{ + "mcpServers": { + "docx": { + "command": "docker", + "args": [ + "run", "--rm", "--read-only", "--cap-drop ALL", + "--tmpfs /tmp/docx-mcp", "--memory 512m", + "docx-mcp:sandboxed" + ] + } + } +} +``` + +### Cursor +```json +{ + "mcp": { + "servers": { + "docx": { + "command": "docker", + "args": [ + "run", "--rm", "--read-only", "--cap-drop ALL", + "--tmpfs /tmp/docx-mcp", "--memory 512m", + "docx-mcp:sandboxed" + ] + } + } + } +} +``` + +--- + +## Security Profiles + +### Readonly Mode +```bash +docker run --rm \ + -e DOCX_MCP_READONLY=true \ + -e DOCX_MCP_WHITELIST="open_document,extract_text,get_metadata,search_text" \ + docx-mcp:sandboxed +``` + +### Maximum Security +```bash +docker run --rm \ + --read-only \ + --cap-drop ALL \ + --network none \ + --tmpfs /tmp/docx-mcp \ + -e DOCX_MCP_READONLY=true \ + -e DOCX_MCP_SANDBOX=true \ + -e DOCX_MCP_NO_EXTERNAL_TOOLS=true \ + -e DOCX_MCP_NO_NETWORK=true \ + docx-mcp:sandboxed +``` + +--- + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `DOCX_MCP_READONLY` | `false` | Restrict to read-only operations | +| `DOCX_MCP_SANDBOX` | `true` | Restrict file operations to temp | +| `DOCX_MCP_NO_EXTERNAL_TOOLS` | `true` | Disable LibreOffice etc. | +| `DOCX_MCP_NO_NETWORK` | `true` | Disable network access | +| `DOCX_MCP_MAX_SIZE` | `52428800` | Max document size (bytes) | +| `DOCX_MCP_MAX_DOCS` | `20` | Max concurrent documents | +| `DOCX_MCP_WHITELIST` | - | Allowed tools (comma-separated) | +| `DOCX_MCP_BLACKLIST` | - | Blocked tools (comma-separated) | + +--- + +## Files Created + +| File | Description | +|------|-------------| +| `Dockerfile.sandboxed` | Minimal security-focused image | +| `Dockerfile.libreoffice` | Full features with LibreOffice | +| `docker-compose.yml` | Production deployment config | +| `DEPLOYMENT.md` | Comprehensive deployment guide | + +--- + +## Summary + +- **Port Required:** No (for local) / 8080 (for remote with bridge) +- **API Key:** No +- **Authentication:** Container isolation + OS controls +- **Recommended:** Local stdio transport with security features enabled diff --git a/Dockerfile b/Dockerfile index b84d69b..36b1df1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,17 @@ -# Multi-stage Docker build for docx-mcp -FROM rust:1.75-slim as builder +# Unified Dockerfile for docx-mcp +# Features: +# - HTTP mode (HTML interface) + stdio mode +# - LibreOffice for high-fidelity PDF conversion +# - Templates directory support +# - Sandboxed, non-root, read-only filesystem where possible -# Install system dependencies for building -RUN apt-get update && apt-get install -y \ +# ============================================================ +# Build Stage +# ============================================================ +FROM rust:1.80-slim AS builder + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ pkg-config \ libssl-dev \ libfontconfig1-dev \ @@ -12,26 +21,26 @@ RUN apt-get update && apt-get install -y \ build-essential \ && rm -rf /var/lib/apt/lists/* -# Set working directory WORKDIR /app -# Copy manifests -COPY Cargo.toml Cargo.lock ./ -COPY build.rs ./ - -# Copy source code +# Copy manifests and source +COPY Cargo.toml Cargo.lock build.rs ./ COPY src/ ./src/ -COPY benches/ ./benches/ -COPY tests/ ./tests/ +COPY assets/ ./assets/ -# Build the application -RUN cargo build --release --all-features +# Build with all key features enabled: +# - runtime-server: stdio MCP transport +# - http-server: HTTP + HTML interface +# - advanced-docx: advanced document operations +RUN cargo build --release --features "runtime-server http-server advanced-docx" -# Runtime stage -FROM debian:bookworm-slim +# ============================================================ +# Runtime Stage +# ============================================================ +FROM debian:bookworm-slim AS runtime -# Install runtime dependencies -RUN apt-get update && apt-get install -y \ +# Install runtime dependencies (including LibreOffice for better PDF conversion) +RUN apt-get update && apt-get install -y --no-install-recommends \ libssl3 \ libfontconfig1 \ libfreetype6 \ @@ -45,33 +54,40 @@ RUN apt-get update && apt-get install -y \ # Create non-root user RUN groupadd -r docxmcp && useradd -r -g docxmcp -s /bin/bash -d /app docxmcp -# Create app directory and set ownership WORKDIR /app RUN chown -R docxmcp:docxmcp /app -# Copy the built binary from builder stage +# Copy binary from builder COPY --from=builder /app/target/release/docx-mcp /usr/local/bin/docx-mcp RUN chmod +x /usr/local/bin/docx-mcp -# Copy additional files if needed -COPY README.md LICENSE ./ +# Create working directories +RUN mkdir -p /tmp/docx-mcp /templates /out && \ + chown -R docxmcp:docxmcp /tmp/docx-mcp /templates /out # Switch to non-root user USER docxmcp -# Create temp directory for document processing -RUN mkdir -p /tmp/docx-mcp && chmod 755 /tmp/docx-mcp +# Expose HTTP port (used when running in HTTP mode) +EXPOSE 3000 -# Expose default MCP port (though MCP typically uses stdin/stdout) -EXPOSE 8080 +# Health check (checks binary is present and executable) +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD /usr/local/bin/docx-mcp --version -# Health check -HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD /usr/local/bin/docx-mcp --version || exit 1 - -# Set environment variables +# Default environment: +# - HTTP disabled by default (use stdio mode). +# - Enable via DOCX_MCP_HTTP=true or --http-mode. ENV RUST_LOG=info -ENV DOCX_MCP_TEMP_DIR=/tmp/docx-mcp +ENV DOCX_MCP_TEMP=/tmp/docx-mcp +ENV DOCX_MCP_HTTP=false +ENV DOCX_MCP_HTTP_ADDRESS=0.0.0.0:3000 +ENV DOCX_MCP_TEMPLATES_DIR=/templates +ENV DOCX_MCP_MAX_SIZE=104857600 +ENV DOCX_MCP_MAX_DOCS=30 -# Default command -CMD ["/usr/local/bin/docx-mcp"] \ No newline at end of file +ENTRYPOINT ["/usr/local/bin/docx-mcp"] + +# Default: stdio mode (for MCP clients). +# To run in HTTP mode, override CMD or set DOCX_MCP_HTTP=true. +CMD [] diff --git a/README.md b/README.md index 961fb14..b095f77 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ A comprehensive Model Context Protocol (MCP) server for Microsoft Word DOCX file ## 📖 Table of Contents - [Quick Start](#-quick-start) +- [Transport Modes](#-transport-modes) - [AI Tool Integration](#-ai-tool-integration) - [Claude Desktop](#claude-desktop) - [Cursor](#cursor) @@ -58,6 +59,7 @@ The server will automatically use external tools if available for enhanced quali The server includes comprehensive security features for enterprise and restricted environments: ### Readonly Mode + ```bash # Enable readonly mode - only allows document viewing and analysis @@ -77,6 +79,7 @@ In readonly mode, only these operations are allowed: - Get document metadata and statistics ### Command Filtering + ```bash # Whitelist specific commands only @@ -96,6 +99,7 @@ export DOCX_MCP_BLACKLIST="save_document,convert_to_pdf,merge_documents" ``` ### Sandbox Mode + ```bash # Restrict all file operations to temp directory only @@ -108,6 +112,7 @@ export DOCX_MCP_SANDBOX=true ``` ### Resource Limits + ```bash # Set maximum document size (100MB default) @@ -126,6 +131,37 @@ export DOCX_MCP_NO_NETWORK=true --no-network ``` +## 🌐 Transport Modes + +The server supports two transport modes: + +- **stdio (default)**: + - Communicates over stdin/stdout. + - Ideal for MCP clients (Claude Desktop, Cursor, etc.). + +- **HTTP (for HTML/browser access over LAN)**: + - Provides: + - A web interface for browsing and using tools. + - A REST API for programmatic access. + - A WebSocket endpoint for real-time communication. + +To enable HTTP mode: + +```bash +# Using command line +./target/release/docx-mcp --http-mode --http-address 0.0.0.0:3000 + +# Using environment variables +export DOCX_MCP_HTTP=true +export DOCX_MCP_HTTP_ADDRESS=0.0.0.0:3000 +./target/release/docx-mcp +``` + +Access: +- HTML Interface: http://your-server-ip:3000 +- API: http://your-server-ip:3000/api/tools +- WebSocket: ws://your-server-ip:3000/ws + ## 🤖 AI Tool Integration ### Claude Desktop @@ -150,6 +186,7 @@ Add to your Claude Desktop configuration file: ``` **With Security Options (using command-line arguments):** + ```json { "mcpServers": { @@ -165,6 +202,7 @@ Add to your Claude Desktop configuration file: ``` **With Security Options (using environment variables):** + ```json { "mcpServers": { @@ -193,6 +231,7 @@ After adding, restart Claude Desktop. You can then ask Claude to: Add to your Cursor settings (`~/.cursor/config.json` or through Settings UI): **Basic Configuration:** + ```json { "mcp": { @@ -210,6 +249,7 @@ Add to your Cursor settings (`~/.cursor/config.json` or through Settings UI): ``` **With Security Options (using command-line arguments):** + ```json { "mcp": { @@ -227,6 +267,7 @@ Add to your Cursor settings (`~/.cursor/config.json` or through Settings UI): ``` **With Security Options (using environment variables):** + ```json { "mcp": { @@ -250,6 +291,7 @@ Add to your Cursor settings (`~/.cursor/config.json` or through Settings UI): Add to your Windsurf configuration (`~/.windsurf/config.json`): **Basic Configuration:** + ```json { "mcp": { @@ -267,6 +309,7 @@ Add to your Windsurf configuration (`~/.windsurf/config.json`): ``` **With Security Options (using arguments):** + ```json { "mcp": { @@ -288,6 +331,7 @@ Add to your Windsurf configuration (`~/.windsurf/config.json`): Add to your Continue configuration (`~/.continue/config.json`): **Basic Configuration:** + ```json { "models": [ @@ -306,6 +350,7 @@ Add to your Continue configuration (`~/.continue/config.json`): ``` **With Security Options:** + ```json { "models": [ @@ -328,6 +373,7 @@ Add to your Continue configuration (`~/.continue/config.json`): If using the MCP extension for VS Code, add to your workspace settings (`.vscode/settings.json`): **Basic Configuration:** + ```json { "mcp.servers": { @@ -343,6 +389,7 @@ If using the MCP extension for VS Code, add to your workspace settings (`.vscode ``` **With Security Options:** + ```json { "mcp.servers": { @@ -369,6 +416,9 @@ docx-mcp --help | Argument | Environment Variable | Description | Example | |----------|---------------------|-------------|---------| +| `--http-mode` | `DOCX_MCP_HTTP=true` | Enable HTTP server mode (HTML interface) | `--http-mode` | +| `--http-address` | `DOCX_MCP_HTTP_ADDRESS` | HTTP server address (default: 0.0.0.0:3000) | `--http-address 0.0.0.0:3000` | +| `--templates-dir` | `DOCX_MCP_TEMPLATES_DIR` | Directory with template .docx files (default: /templates) | `--templates-dir /path/to/templates` | | `--readonly` | `DOCX_MCP_READONLY=true` | Enable readonly mode - only viewing operations | `--readonly` | | `--whitelist ` | `DOCX_MCP_WHITELIST` | Comma-separated list of allowed commands | `--whitelist open_document,extract_text` | | `--blacklist ` | `DOCX_MCP_BLACKLIST` | Comma-separated list of forbidden commands | `--blacklist save_document,convert_to_pdf` | @@ -383,9 +433,12 @@ docx-mcp --help ### Example Usage ```bash -# Basic usage +# Basic usage (stdio mode) ./target/release/docx-mcp +# HTTP mode for HTML interface +./target/release/docx-mcp --http-mode --http-address 0.0.0.0:3000 + # Readonly mode with size limit ./target/release/docx-mcp --readonly --max-size 10485760 @@ -428,6 +481,37 @@ docx-mcp --help - **Multiple Documents**: Handle multiple documents simultaneously - **Temp File Management**: Automatic cleanup of temporary files +### Templates and Fill-in Generation + +The server supports a templates directory for reusable document templates: + +- **Templates directory**: + - Configurable via `--templates-dir` or `DOCX_MCP_TEMPLATES_DIR`. + - Intended to be mounted in Docker (e.g., `/templates`). +- **Template tools**: + - `list_templates`: Lists all `.docx` templates available. + - `open_template`: Opens a template by name as a working document. + - `generate_from_template`: Generates a new document from a template with fill-in fields. +- **Fill-in fields**: + - Use placeholders like `{{CLIENT_NAME}}` in your template. + - Provide field values as key-value pairs when calling `generate_from_template`. + +Example: + +```json +{ + "name": "generate_from_template", + "arguments": { + "template_name": "nda_template.docx", + "output_path": "/out/nda_acme.docx", + "fields": { + "CLIENT_NAME": "Acme Corp", + "EFFECTIVE_DATE": "2025-11-09" + } + } +} +``` + ### Professional Templates - **Business Letters**: Professional correspondence with proper formatting - **Resumes**: Modern resume layouts with sections for experience, education, skills @@ -664,6 +748,46 @@ Saves the document to a specified path. } ``` +### Template Tools + +#### `list_templates` +Lists all available templates in the configured templates directory. +```json +{ + "tool": "list_templates", + "arguments": {} +} +``` + +#### `open_template` +Opens a template document by name from the templates directory. +```json +{ + "tool": "open_template", + "arguments": { + "name": "nda_template.docx" + } +} +``` + +#### `generate_from_template` +Generates a new document from a template by filling placeholders like `{{FIELD_NAME}}` with provided values. + +Example: +```json +{ + "tool": "generate_from_template", + "arguments": { + "template_name": "nda_template.docx", + "output_path": "/out/nda_acme.docx", + "fields": { + "CLIENT_NAME": "Acme Corp", + "EFFECTIVE_DATE": "2025-11-09" + } + } +} +``` + ### Content Addition #### `add_paragraph` @@ -784,6 +908,7 @@ Finds and replaces text in the document. ## Example Workflows ### Creating a Report + ```javascript // 1. Create a new document const doc = await mcp.call("create_document", {}); @@ -820,6 +945,7 @@ await mcp.call("convert_to_pdf", { ``` ### Batch Processing Documents + ```javascript // Open and convert multiple documents const documents = ["doc1.docx", "doc2.docx", "doc3.docx"]; @@ -850,6 +976,23 @@ for (const docPath of documents) { } ``` +### Using Templates + +```javascript +// 1. List available templates +const templates = await mcp.call("list_templates", {}); + +// 2. Generate a new document from a template with fill-in fields +const result = await mcp.call("generate_from_template", { + template_name: "nda_template.docx", + output_path: "/out/nda_acme.docx", + fields: { + "CLIENT_NAME": "Acme Corp", + "EFFECTIVE_DATE": "2025-11-09" + } +}); +``` + ## Architecture The server is built with a modular architecture: @@ -858,20 +1001,24 @@ The server is built with a modular architecture: - **`docx_handler.rs`**: Core DOCX manipulation logic - **`converter.rs`**: PDF and image conversion functionality - **`docx_tools.rs`**: MCP tool definitions and handlers +- **`http_server.rs`**: HTTP server and HTML interface for LAN access ## Development ### Building from Source + ```bash cargo build ``` ### Running Tests + ```bash cargo test ``` ### Debug Mode + ```bash RUST_LOG=debug cargo run ``` @@ -1045,4 +1192,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file - Built with the official [MCP Rust SDK](https://github.com/modelcontextprotocol/rust-sdk) - Uses [docx-rs](https://github.com/bokuweb/docx-rs) for DOCX manipulation - PDF generation with [printpdf](https://github.com/fschutt/printpdf) -- Image processing with [image-rs](https://github.com/image-rs/image) \ No newline at end of file +- Image processing with [image-rs](https://github.com/image-rs/image) diff --git a/assets/html_interface.html b/assets/html_interface.html new file mode 100644 index 0000000..9628381 --- /dev/null +++ b/assets/html_interface.html @@ -0,0 +1,553 @@ + + + + + + DOCX MCP Server - Web Interface + + + +
+

DOCX MCP Server

+

Word Document Processing Interface

+
+ +
+
+

Templates

+
+

Loading templates...

+
+
+ +
+

Available Tools

+
+
+

Loading tools...

+
+
+
+ + + + +
+ +
+ Connecting... +
+ + + + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b8bbf9a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,64 @@ +version: '3.8' + +# Production deployment for docx-mcp server +# This creates a sandboxed environment with optional WebSocket bridge for remote access + +services: + # WebSocket bridge for remote access (optional) + # Comment out this service if using local stdio transport only + websockify: + image: websockify/websockify + ports: + - "8080:8080" + depends_on: + - docx-mcp + command: ["--web", "/dev/null", "8080", "docx-mcp:8080"] + networks: + - docx-network + restart: unless-stopped + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "8080"] + interval: 30s + timeout: 5s + retries: 3 + + # Main docx-mcp server + docx-mcp: + image: docx-mcp:sandboxed + build: + context: . + dockerfile: Dockerfile.sandboxed + read_only: true + cap_drop: + - ALL + cap_add: + - NET_BIND_SERVICE # Only if exposing port for bridge + tmpfs: + - /tmp/docx-mcp:rw,noexec,nosuid,size=100m + deploy: + resources: + limits: + memory: 512M + cpus: '1.0' + replicas: 1 + environment: + - RUST_LOG=info + - DOCX_MCP_SANDBOX=true + - DOCX_MCP_NO_EXTERNAL_TOOLS=true + - DOCX_MCP_NO_NETWORK=true + - DOCX_MCP_MAX_SIZE=52428800 + - DOCX_MCP_MAX_DOCS=20 + networks: + - docx-network + ports: + - "8080:8080" # Only needed for WebSocket bridge + restart: unless-stopped + healthcheck: + test: ["CMD", "/usr/local/bin/docx-mcp", "--version"] + interval: 30s + timeout: 5s + retries: 3 + +networks: + docx-network: + driver: bridge diff --git a/src/docx_tools.rs b/src/docx_tools.rs index 2cdc868..b5f3551 100644 --- a/src/docx_tools.rs +++ b/src/docx_tools.rs @@ -20,6 +20,7 @@ pub struct DocxToolsProvider { advanced: Arc, security: Arc, security_config: SecurityConfig, + templates_dir: PathBuf, } impl DocxToolsProvider { @@ -28,6 +29,10 @@ impl DocxToolsProvider { } pub fn new_with_security(security_config: SecurityConfig) -> Self { + Self::new_with_security_and_templates(security_config, PathBuf::from("/templates")) + } + + pub fn new_with_security_and_templates(security_config: SecurityConfig, templates_dir: PathBuf) -> Self { Self { handler: Arc::new(RwLock::new(DocxHandler::new().expect("Failed to create DocxHandler"))), converter: Arc::new(DocumentConverter::new()), @@ -35,6 +40,7 @@ impl DocxToolsProvider { advanced: Arc::new(AdvancedDocxHandler::new()), security: Arc::new(SecurityMiddleware::new(security_config.clone())), security_config, + templates_dir, } } @@ -52,6 +58,7 @@ impl DocxToolsProvider { advanced: Arc::new(AdvancedDocxHandler::new()), security: Arc::new(SecurityMiddleware::new(security_config.clone())), security_config, + templates_dir: PathBuf::from("/templates"), } } } @@ -956,6 +963,56 @@ impl DocxToolsProvider { }), annotations: None, }, + Tool { + name: "list_templates".to_string(), + description: Some("List available document templates from the templates directory".to_string()), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + annotations: None, + }, + Tool { + name: "open_template".to_string(), + description: Some("Open a template document by name from the templates directory".to_string()), + input_schema: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Template file name (e.g., 'nda_template.docx')" + } + }, + "required": ["name"] + }), + annotations: None, + }, + Tool { + name: "generate_from_template".to_string(), + description: Some( + "Generate a new document from a template by filling placeholders like {{FIELD_NAME}} with provided values".to_string() + ), + input_schema: json!({ + "type": "object", + "properties": { + "template_name": { + "type": "string", + "description": "Template file name (e.g., 'nda_template.docx')" + }, + "output_path": { + "type": "string", + "description": "Output DOCX path (e.g., '/out/nda_filled.docx')" + }, + "fields": { + "type": "object", + "description": "Key-value pairs; keys are placeholder names without braces. Example: {\"CLIENT_NAME\": \"Acme Corp\"}" + } + }, + "required": ["template_name", "output_path", "fields"] + }), + annotations: None, + }, ]; // Filter tools based on security configuration @@ -1715,7 +1772,173 @@ impl DocxToolsProvider { Err(e) => ToolOutcome::Error { code: ErrorCode::InternalError, error: e.to_string(), hint: None }, } }, - + + "list_templates" => { + let mut templates = Vec::new(); + if self.templates_dir.exists() { + if let Ok(entries) = std::fs::read_dir(&self.templates_dir) { + for entry in entries.filter_map(|e| e.ok()) { + let path = entry.path(); + if path.is_file() { + if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + if ext.eq_ignore_ascii_case("docx") { + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + templates.push(name.to_string()); + } + } + } + } + } + } + } + templates.sort(); + ToolOutcome::Metadata { + metadata: serde_json::json!({ "templates": templates }), + } + }, + + "open_template" => { + let name = arguments["name"].as_str().unwrap_or(""); + if name.is_empty() { + ToolOutcome::Error { + code: ErrorCode::ValidationError, + error: "Template name is required".to_string(), + hint: Some("Provide 'name' with a .docx filename from list_templates".to_string()), + } + } else { + let path = self.templates_dir.join(name); + if !path.exists() || !path.is_file() { + ToolOutcome::Error { + code: ErrorCode::ValidationError, + error: format!("Template not found: {}", name), + hint: Some("Check list_templates for available names".to_string()), + } + } else { + let mut handler = self.handler.write().unwrap(); + match handler.open_document(&path) { + Ok(doc_id) => ToolOutcome::Created { + document_id: doc_id, + message: Some(format!("Opened template '{}' as document", name)), + }, + Err(e) => ToolOutcome::Error { + code: ErrorCode::InternalError, + error: e.to_string(), + hint: None, + }, + } + } + } + }, + + "generate_from_template" => { + let template_name = arguments["template_name"].as_str().unwrap_or(""); + let output_path = arguments["output_path"].as_str().unwrap_or(""); + let fields = arguments.get("fields").and_then(|v| v.as_object()).cloned().unwrap_or_default(); + + if template_name.is_empty() { + ToolOutcome::Error { + code: ErrorCode::ValidationError, + error: "template_name is required".to_string(), + hint: Some("Provide the template file name from list_templates".to_string()), + } + } else if output_path.is_empty() { + ToolOutcome::Error { + code: ErrorCode::ValidationError, + error: "output_path is required".to_string(), + hint: Some("Provide an absolute path where the generated DOCX will be saved".to_string()), + } + } else { + let template_path = self.templates_dir.join(template_name); + if !template_path.exists() || !template_path.is_file() { + ToolOutcome::Error { + code: ErrorCode::ValidationError, + error: format!("Template not found: {}", template_name), + hint: Some("Check list_templates for available names".to_string()), + } + } else { + // Open template + let mut handler = self.handler.write().unwrap(); + let doc_id = match handler.open_document(&template_path) { + Ok(id) => id, + Err(e) => { + drop(handler); + return ToolOutcome::Error { + code: ErrorCode::InternalError, + error: e.to_string(), + hint: None, + }; + } + }; + + // Apply field replacements + let mut replace_count = 0usize; + for (key, value) in &fields { + let placeholder = format!("{{{{{}}}}}", key); + let val_str = match value { + Value::String(s) => s.clone(), + _ => value.to_string(), + }; + if let Ok(count) = handler.find_and_replace_advanced( + &doc_id, + &placeholder, + &val_str, + false, // case_sensitive: false for placeholders + true, // whole_word: true (treat placeholder as whole token) + false, // use_regex: false + ) { + replace_count += count; + } + } + + // Save generated document + let out_path = PathBuf::from(output_path); + let result = if out_path.parent().is_some() { + if let Err(e) = std::fs::create_dir_all(out_path.parent().unwrap()) { + drop(handler); + ToolOutcome::Error { + code: ErrorCode::InternalError, + error: format!("Failed to create output directory: {}", e), + hint: None, + } + } else { + match handler.save_document(&doc_id, &out_path) { + Ok(()) => ToolOutcome::Ok { + message: Some(format!( + "Generated document from template '{}' with {} replacements at {}", + template_name, replace_count, output_path + )), + }, + Err(e) => ToolOutcome::Error { + code: ErrorCode::InternalError, + error: e.to_string(), + hint: None, + }, + } + } + } else { + match handler.save_document(&doc_id, &out_path) { + Ok(()) => ToolOutcome::Ok { + message: Some(format!( + "Generated document from template '{}' with {} replacements at {}", + template_name, replace_count, output_path + )), + }, + Err(e) => ToolOutcome::Error { + code: ErrorCode::InternalError, + error: e.to_string(), + hint: None, + }, + } + }; + + // Optionally close template document + let _ = handler.close_document(&doc_id); + drop(handler); + result + } + } + }, + _ => { ToolOutcome::Error { code: ErrorCode::UnknownTool, error: format!("Unknown or unsupported tool: {}", name), hint: None } } diff --git a/src/http_server.rs b/src/http_server.rs new file mode 100644 index 0000000..2c7cafa --- /dev/null +++ b/src/http_server.rs @@ -0,0 +1,202 @@ +use axum::{ + extract::{ + ws::{Message, WebSocket}, + State, WebSocketUpgrade, + }, + response::{Html, Response}, + routing::{get, post}, + Router, + Json, +}; +use futures::{SinkExt, StreamExt}; +use serde::{Deserialize, Serialize}; +use std::{ + net::SocketAddr, + sync::Arc, +}; +use tower_http::cors::{Any, CorsLayer}; +use tracing::info; + +use crate::docx_tools::DocxToolsProvider; + +/// Application state shared across HTTP handlers +pub struct AppState { + pub provider: DocxToolsProvider, +} + +/// Request to call a tool +#[derive(Debug, Deserialize)] +pub struct ToolCallRequest { + pub name: String, + pub arguments: serde_json::Value, +} + +/// Response from a tool call +#[derive(Debug, Serialize)] +pub struct ToolCallResponse { + pub success: bool, + pub content: serde_json::Value, + pub error: Option, +} + +/// Response with list of tools +#[derive(Debug, Serialize)] +pub struct ListToolsResponse { + pub success: bool, + pub tools: Vec, +} + +/// Start the HTTP server +pub async fn start_http_server(addr: &str, provider: DocxToolsProvider) -> anyhow::Result<()> { + let state = Arc::new(AppState { provider }); + + let app = Router::new() + .state(state.clone()) + // Serve HTML interface + .route("/", get(index_handler)) + .route("/api/tools", get(list_tools_handler)) + .route("/api/call", post(call_tool_handler)) + .route("/ws", get(ws_handler)) + // CORS policy - allow all origins on LAN + .layer(CorsLayer::new().allow_origin(Any()).allow_methods(tower_http::cors::Method::any())); + + let addr = SocketAddr::from_str(addr).unwrap_or_else(|_| { + info!("Invalid address format, using default 0.0.0.0:3000"); + "0.0.0.0:3000".parse().unwrap() + }); + + info!("Starting HTTP server on {}", addr); + + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +} + +/// Serve the HTML interface +async fn index_handler() -> Html { + Html(include_str!("../assets/html_interface.html").to_string()) +} + +/// List available tools +async fn list_tools_handler(State(state): State>) -> Json { + let tools = state.provider.list_tools().await; + + let tool_list: Vec = tools.iter().map(|t| { + serde_json::json!({ + "name": t.name, + "description": t.description, + "input_schema": t.input_schema + }) + }).collect(); + + Json(ListToolsResponse { + success: true, + tools: tool_list, + }) +} + +/// Call a tool via HTTP POST +async fn call_tool_handler( + State(state): State>, + Json(request): Json, +) -> Json { + let response = state.provider.call_tool(&request.name, request.arguments).await; + + // Convert response to JSON + let content = if let Some(content) = response.content.first() { + match content { + mcp_core::types::ToolResponseContent::Text(text) => { + serde_json::from_str(&text.text).unwrap_or_else(|_| { + serde_json::json!({"text": text.text.clone()}) + }) + }, + mcp_core::types::ToolResponseContent::Image(image) => { + serde_json::json!({ + "data": image.data, + "mimeType": image.mime_type + }) + }, + } + } else { + serde_json::json!({}) + }; + + Json(ToolCallResponse { + success: response.is_error.unwrap_or(false) == false, + content, + error: response.is_error.unwrap_or(false).then(|| "Tool call failed".to_string()), + }) +} + +/// WebSocket handler for real-time communication +async fn ws_handler( + ws: WebSocketUpgrade, + State(state): State> +) -> Result { + ws.on_upgrade(move |socket| async move { + let provider = state.provider.clone(); + let mut ws = socket; + + // Handle WebSocket messages + while let Some(msg) = ws.recv().await { + let msg = match msg { + Ok(msg) => msg, + Err(_) => continue, + }; + + let text = match msg { + Message::Text(text) => text.to_string(), + _ => continue, + }; + + // Parse request + let request: ToolCallRequest = match serde_json::from_str(&text) { + Ok(req) => req, + Err(e) => { + let error_response = ToolCallResponse { + success: false, + content: serde_json::json!({}), + error: Some(format!("Parse error: {}", e)), + }; + let _ = ws.send(Message::Text( + serde_json::to_string(&error_response).unwrap_or("{}".to_string()) + )).await; + continue; + } + }; + + // Call tool + let response = provider.call_tool(&request.name, request.arguments).await; + + // Convert response to JSON + let content = if let Some(content) = response.content.first() { + match content { + mcp_core::types::ToolResponseContent::Text(text) => { + serde_json::from_str(&text.text).unwrap_or_else(|_| { + serde_json::json!({"text": text.text.clone()}) + }) + }, + mcp_core::types::ToolResponseContent::Image(image) => { + serde_json::json!({ + "data": image.data, + "mimeType": image.mime_type + }) + }, + } + } else { + serde_json::json!({}) + }; + + let ws_response = ToolCallResponse { + success: response.is_error.unwrap_or(false) == false, + content, + error: response.is_error.unwrap_or(false).then(|| "Tool call failed".to_string()), + }; + + let _ = ws.send(Message::Text( + serde_json::to_string(&ws_response).unwrap_or("{}".to_string()) + )).await; + } + }) +} diff --git a/src/main.rs b/src/main.rs index 21f1603..e795137 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,6 +15,8 @@ mod converter; mod pure_converter; #[cfg(all(feature = "runtime-server", feature = "advanced-docx"))] mod advanced_docx; +#[cfg(feature = "http-server")] +mod http_server; mod security; #[cfg(feature = "embedded-fonts")] @@ -53,6 +55,39 @@ async fn main() -> Result<()> { } } + // Check if HTTP mode is enabled before consuming args + let http_mode = args.http_mode; + let http_address = args.http_address.clone(); + let templates_dir = args.templates_dir.clone(); + + // Create the tools provider + let security_config = security::SecurityConfig::from_args(args); + info!("Starting DOCX MCP Server - Security: {}", security_config.get_summary()); + info!("Templates directory: {}", templates_dir); + + let provider = DocxToolsProvider::new_with_security_and_templates( + security_config, + std::path::PathBuf::from(&templates_dir), + ); + + // Check if HTTP mode is enabled + if http_mode { + #[cfg(feature = "http-server")] + { + let addr = http_address.unwrap_or_else(|| "0.0.0.0:3000".to_string()); + info!("Starting in HTTP mode on {}", addr); + return http_server::start_http_server(&addr, provider).await; + } + + #[cfg(not(feature = "http-server"))] + { + eprintln!("HTTP mode requires the 'http-server' feature to be enabled during build."); + eprintln!("Rebuild with: cargo build --release --features http-server"); + return Err(anyhow::anyhow!("HTTP mode not available")); + } + } + + // Default: stdio mode #[cfg(feature = "runtime-server")] { use mcp_server::{Router, Server}; @@ -67,9 +102,6 @@ async fn main() -> Result<()> { use std::future::Future; use tokio::io::{stdin, stdout}; - let security_config = security::SecurityConfig::from_args(args); - info!("Starting DOCX MCP Server - Security: {}", security_config.get_summary()); - #[derive(Clone)] struct DocxRouter(docx_tools::DocxToolsProvider); @@ -80,7 +112,6 @@ async fn main() -> Result<()> { CapabilitiesBuilder::new().with_tools(true).build() } fn list_tools(&self) -> Vec { - // DocxToolsProvider::list_tools is async; block briefly with tokio runtime handle let rt = tokio::runtime::Handle::current(); let tools = rt.block_on(self.0.list_tools()); tools.into_iter().map(|t| SpecTool{ name: t.name, description: t.description.unwrap_or_default(), input_schema: t.input_schema }).collect() @@ -90,7 +121,6 @@ async fn main() -> Result<()> { let name = tool_name.to_string(); Box::pin(async move { let resp = provider.call_tool(&name, arguments).await; - // Convert our CallToolResponse (text JSON) to Content::text let text = match resp.content.get(0) { Some(mcp_core::types::ToolResponseContent::Text(t)) => t.text.clone(), _ => serde_json::to_string(&resp).unwrap_or_else(|_| "{}".to_string()), @@ -108,7 +138,7 @@ async fn main() -> Result<()> { } } - let router = DocxRouter(DocxToolsProvider::new_with_security(security_config)); + let router = DocxRouter(provider); let service = RouterService(router); let server = Server::new(service); let transport = mcp_server::ByteTransport::new(stdin(), stdout()); @@ -117,9 +147,8 @@ async fn main() -> Result<()> { #[cfg(not(feature = "runtime-server"))] { - // No runtime server compiled in; if no subcommand was used, exit with guidance eprintln!("Runtime server disabled. Rebuild with --features runtime-server to run the MCP server."); } Ok(()) -} \ No newline at end of file +} diff --git a/src/security.rs b/src/security.rs index 7f54638..7a83a70 100644 --- a/src/security.rs +++ b/src/security.rs @@ -42,6 +42,18 @@ pub struct Args { #[arg(long, env = "DOCX_MCP_MAX_DOCS")] pub max_docs: Option, + /// Enable HTTP server mode for HTML interface + #[arg(long, env = "DOCX_MCP_HTTP")] + pub http_mode: bool, + + /// HTTP server address and port (default: 0.0.0.0:3000) + #[arg(long, env = "DOCX_MCP_HTTP_ADDRESS")] + pub http_address: Option, + + /// Path to directory containing template .docx files + #[arg(long, env = "DOCX_MCP_TEMPLATES_DIR", default_value = "/templates")] + pub templates_dir: String, + /// Optional top-level subcommand (e.g., fonts download) #[command(subcommand)] pub command: Option,