Initial Commit

This commit is contained in:
Andy
2025-08-11 14:31:51 +08:00
commit 39e94c1b13
36 changed files with 12517 additions and 0 deletions
+868
View File
@@ -0,0 +1,868 @@
use anyhow::{Context, Result};
use docx_rs::*;
use std::collections::HashMap;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use base64;
/// Advanced DOCX manipulation features
pub struct AdvancedDocxHandler;
impl AdvancedDocxHandler {
pub fn new() -> Self {
Self
}
/// Create a document with professional template
pub fn create_from_template(&self, template_type: DocumentTemplate) -> Result<Docx> {
let mut docx = Docx::new();
match template_type {
DocumentTemplate::BusinessLetter => {
docx = self.apply_business_letter_template(docx)?;
}
DocumentTemplate::Resume => {
docx = self.apply_resume_template(docx)?;
}
DocumentTemplate::Report => {
docx = self.apply_report_template(docx)?;
}
DocumentTemplate::Invoice => {
docx = self.apply_invoice_template(docx)?;
}
DocumentTemplate::Contract => {
docx = self.apply_contract_template(docx)?;
}
DocumentTemplate::Memo => {
docx = self.apply_memo_template(docx)?;
}
DocumentTemplate::Newsletter => {
docx = self.apply_newsletter_template(docx)?;
}
}
Ok(docx)
}
/// Add a table of contents
pub fn add_table_of_contents(&self, docx: Docx) -> Result<Docx> {
let toc = TableOfContents::new()
.heading_text("Table of Contents")
.heading_style("TOCHeading");
let mut docx = docx.add_table_of_contents(toc);
// Add instruction text
let instruction = Paragraph::new()
.add_run(
Run::new()
.add_text("Right-click and select 'Update Field' to refresh the table of contents")
.italic()
.size(20)
.color("808080")
);
docx = docx.add_paragraph(instruction);
docx = docx.add_paragraph(Paragraph::new().add_run(Run::new().add_break(BreakType::Page)));
Ok(docx)
}
/// Add an image to the document
pub fn add_image(
&self,
docx: Docx,
image_data: &[u8],
width_px: u32,
height_px: u32,
alt_text: Option<&str>
) -> Result<Docx> {
// Convert pixels to EMUs (English Metric Units)
// 1 pixel = 9525 EMUs
let width_emu = width_px * 9525;
let height_emu = height_px * 9525;
let drawing = Drawing::new()
.inline(
Inline::new()
.extent(width_emu, height_emu)
.graphic(
Graphic::new()
.graphic_data(
GraphicData::new()
.pic(
Pic::new()
.blip_fill(image_data.to_vec())
)
)
)
);
let paragraph = Paragraph::new()
.add_run(Run::new().add_drawing(drawing));
Ok(docx.add_paragraph(paragraph))
}
/// Add a chart to the document
pub fn add_chart(&self, docx: Docx, chart_type: ChartType, data: ChartData) -> Result<Docx> {
// Charts in DOCX are complex and usually require embedding Excel data
// For now, we'll create a table representation
let mut table = Table::new(vec![]);
// Add headers
let mut header_cells = vec![TableCell::new().add_paragraph(
Paragraph::new().add_run(Run::new().add_text("Category").bold())
)];
for series in &data.series {
header_cells.push(
TableCell::new().add_paragraph(
Paragraph::new().add_run(Run::new().add_text(&series.name).bold())
)
);
}
table = table.add_row(TableRow::new(header_cells));
// Add data rows
for (i, category) in data.categories.iter().enumerate() {
let mut row_cells = vec![TableCell::new().add_paragraph(
Paragraph::new().add_run(Run::new().add_text(category))
)];
for series in &data.series {
if let Some(value) = series.values.get(i) {
row_cells.push(
TableCell::new().add_paragraph(
Paragraph::new().add_run(Run::new().add_text(&value.to_string()))
)
);
}
}
table = table.add_row(TableRow::new(row_cells));
}
// Add title for the chart
let title = Paragraph::new()
.add_run(Run::new().add_text(&format!("{:?}: {}", chart_type, data.title)).bold())
.align(AlignmentType::Center);
Ok(docx.add_paragraph(title).add_table(table))
}
/// Add a hyperlink
pub fn add_hyperlink(&self, docx: Docx, text: &str, url: &str) -> Result<Docx> {
let hyperlink = Hyperlink::new(url, HyperlinkType::External)
.add_run(Run::new().add_text(text).color("0000FF").underline("single"));
let paragraph = Paragraph::new().add_hyperlink(hyperlink);
Ok(docx.add_paragraph(paragraph))
}
/// Add a bookmark
pub fn add_bookmark(&self, docx: Docx, bookmark_name: &str, text: &str) -> Result<Docx> {
let bookmark_id = Uuid::new_v4().to_string();
let bookmark_start = BookmarkStart::new(&bookmark_id, bookmark_name);
let bookmark_end = BookmarkEnd::new(&bookmark_id);
let paragraph = Paragraph::new()
.add_bookmark_start(bookmark_start)
.add_run(Run::new().add_text(text))
.add_bookmark_end(bookmark_end);
Ok(docx.add_paragraph(paragraph))
}
/// Add a cross-reference
pub fn add_cross_reference(&self, docx: Docx, bookmark_name: &str, display_text: &str) -> Result<Docx> {
// Cross-references in DOCX use field codes
let field = ComplexField::new()
.instruction(&format!("REF {} \\h", bookmark_name))
.default_text(display_text);
let paragraph = Paragraph::new().add_complex_field(field);
Ok(docx.add_paragraph(paragraph))
}
/// Add document properties and metadata
pub fn set_document_properties(&self, docx: Docx, properties: DocumentProperties) -> Result<Docx> {
let docx = docx
.title(&properties.title)
.subject(&properties.subject)
.creator(&properties.author)
.keywords(&properties.keywords.join(", "))
.description(&properties.description);
if let Some(company) = properties.company {
docx.company(&company);
}
if let Some(manager) = properties.manager {
docx.manager(&manager);
}
Ok(docx)
}
/// Add a custom styled section
pub fn add_section(&self, docx: Docx, section_config: SectionConfig) -> Result<Docx> {
let mut section = SectionProperty::new();
// Page size
match section_config.page_size {
PageSize::A4 => {
section = section.page_size(11906, 16838); // A4 in twips
}
PageSize::Letter => {
section = section.page_size(12240, 15840); // Letter in twips
}
PageSize::Legal => {
section = section.page_size(12240, 20160); // Legal in twips
}
PageSize::A3 => {
section = section.page_size(16838, 23811); // A3 in twips
}
}
// Orientation
if section_config.landscape {
section = section.page_size(
section.page_size.1,
section.page_size.0
);
}
// Margins (convert mm to twips: 1mm = 56.7 twips)
section = section.page_margin(
PageMargin::new()
.top((section_config.margins.top * 56.7) as i32)
.bottom((section_config.margins.bottom * 56.7) as i32)
.left((section_config.margins.left * 56.7) as i32)
.right((section_config.margins.right * 56.7) as i32)
.header((section_config.margins.header * 56.7) as i32)
.footer((section_config.margins.footer * 56.7) as i32)
);
// Columns
if section_config.columns > 1 {
section = section.columns(section_config.columns);
}
Ok(docx.add_section(section))
}
/// Add a watermark
pub fn add_watermark(&self, docx: Docx, text: &str, style: WatermarkStyle) -> Result<Docx> {
let watermark = match style {
WatermarkStyle::Diagonal => {
Run::new()
.add_text(text)
.size(144) // Large size
.color("C0C0C0") // Light gray
.bold()
}
WatermarkStyle::Horizontal => {
Run::new()
.add_text(text)
.size(100)
.color("E0E0E0")
}
};
// Watermarks are typically added to headers
let header = Header::new().add_paragraph(
Paragraph::new()
.add_run(watermark)
.align(AlignmentType::Center)
);
Ok(docx.header(header))
}
/// Add footnote
pub fn add_footnote(&self, docx: Docx, reference_text: &str, footnote_text: &str) -> Result<Docx> {
let footnote_id = Uuid::new_v4().to_string();
let footnote = Footnote::new(&footnote_id)
.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text(footnote_text))
);
let paragraph = Paragraph::new()
.add_run(Run::new().add_text(reference_text))
.add_footnote_reference(&footnote_id);
Ok(docx.add_paragraph(paragraph).add_footnote(footnote))
}
/// Add endnote
pub fn add_endnote(&self, docx: Docx, reference_text: &str, endnote_text: &str) -> Result<Docx> {
let endnote_id = Uuid::new_v4().to_string();
let endnote = Endnote::new(&endnote_id)
.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text(endnote_text))
);
let paragraph = Paragraph::new()
.add_run(Run::new().add_text(reference_text))
.add_endnote_reference(&endnote_id);
Ok(docx.add_paragraph(paragraph).add_endnote(endnote))
}
/// Add custom styles
pub fn add_custom_style(&self, docx: Docx, style: CustomStyle) -> Result<Docx> {
let style_def = Style::new(&style.id, StyleType::Paragraph)
.name(&style.name)
.based_on(&style.based_on.unwrap_or_else(|| "Normal".to_string()));
let mut paragraph_property = ParagraphProperty::new();
if let Some(spacing) = style.spacing {
paragraph_property = paragraph_property
.line_spacing(LineSpacing::new(SpacingType::Auto, spacing.before, spacing.after));
}
if let Some(indent) = style.indent {
paragraph_property = paragraph_property
.indent(Some(indent.left), Some(indent.right), Some(indent.first_line), None);
}
let mut run_property = RunProperty::new();
if let Some(font) = style.font {
run_property = run_property.fonts(RunFonts::new().ascii(&font).east_asia(&font));
}
if let Some(size) = style.size {
run_property = run_property.size(size);
}
if style.bold {
run_property = run_property.bold();
}
if style.italic {
run_property = run_property.italic();
}
if let Some(color) = style.color {
run_property = run_property.color(&color);
}
let style_def = style_def
.paragraph_property(paragraph_property)
.run_property(run_property);
Ok(docx.add_style(style_def))
}
/// Mail merge functionality
pub fn prepare_mail_merge_template(&self, docx: Docx, fields: Vec<String>) -> Result<Docx> {
let mut docx = docx;
for field in fields {
let merge_field = ComplexField::new()
.instruction(&format!("MERGEFIELD {} \\* MERGEFORMAT", field))
.default_text(&format!("«{}»", field));
let paragraph = Paragraph::new()
.add_complex_field(merge_field);
docx = docx.add_paragraph(paragraph);
}
Ok(docx)
}
/// Add comments (annotations)
pub fn add_comment(&self, docx: Docx, text: &str, comment: &str, author: &str) -> Result<Docx> {
let comment_id = Uuid::new_v4().to_string();
let date = Utc::now();
let comment_obj = Comment::new(&comment_id, author)
.date(date)
.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text(comment))
);
let comment_range_start = CommentRangeStart::new(&comment_id);
let comment_range_end = CommentRangeEnd::new(&comment_id);
let comment_reference = CommentReference::new(&comment_id);
let paragraph = Paragraph::new()
.add_comment_range_start(comment_range_start)
.add_run(Run::new().add_text(text))
.add_comment_range_end(comment_range_end)
.add_run(Run::new().add_comment_reference(comment_reference));
Ok(docx.add_paragraph(paragraph).add_comment(comment_obj))
}
// Template helper methods
fn apply_business_letter_template(&self, mut docx: Docx) -> Result<Docx> {
// Add sender info placeholder
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Your Name]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[Your Address]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[City, State ZIP]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[Your Email]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[Your Phone]"))
);
docx = docx.add_paragraph(Paragraph::new());
// Date
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Date]"))
);
docx = docx.add_paragraph(Paragraph::new());
// Recipient info
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Recipient Name]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[Title]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[Company]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[Address]"))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_text("[City, State ZIP]"))
);
docx = docx.add_paragraph(Paragraph::new());
// Salutation
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("Dear [Recipient Name]:"))
);
docx = docx.add_paragraph(Paragraph::new());
// Body placeholder
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Letter body paragraph 1]"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Letter body paragraph 2]"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Letter body paragraph 3]"))
);
docx = docx.add_paragraph(Paragraph::new());
// Closing
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("Sincerely,"))
);
docx = docx.add_paragraph(Paragraph::new());
docx = docx.add_paragraph(Paragraph::new());
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Your Name]"))
);
Ok(docx)
}
fn apply_resume_template(&self, mut docx: Docx) -> Result<Docx> {
// Name header
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[YOUR NAME]").size(32).bold())
.align(AlignmentType::Center)
);
// Contact info
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Email] | [Phone] | [LinkedIn] | [Location]").size(22))
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(Paragraph::new().add_run(Run::new().add_text("").size(12)));
// Professional Summary
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("PROFESSIONAL SUMMARY").size(24).bold())
.style("Heading2")
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[2-3 lines summarizing your experience and key skills]"))
);
// Experience
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("EXPERIENCE").size(24).bold())
.style("Heading2")
);
// Education
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("EDUCATION").size(24).bold())
.style("Heading2")
);
// Skills
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("SKILLS").size(24).bold())
.style("Heading2")
);
Ok(docx)
}
fn apply_report_template(&self, mut docx: Docx) -> Result<Docx> {
// Title page
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text(""))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_break(BreakType::TextWrapping))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[REPORT TITLE]").size(36).bold())
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Subtitle or Description]").size(24))
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_break(BreakType::TextWrapping))
.add_run(Run::new().add_break(BreakType::TextWrapping))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("Prepared by:").size(20))
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Author Name]").size(20))
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Date]").size(20))
.align(AlignmentType::Center)
);
// Page break
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_break(BreakType::Page))
);
// Table of Contents placeholder
docx = self.add_table_of_contents(docx)?;
// Executive Summary
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("Executive Summary").size(28).bold())
.style("Heading1")
);
Ok(docx)
}
fn apply_invoice_template(&self, mut docx: Docx) -> Result<Docx> {
// Company header
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[COMPANY NAME]").size(32).bold())
.align(AlignmentType::Right)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("INVOICE").size(28).bold())
.align(AlignmentType::Right)
);
// Invoice details table
let invoice_info = Table::new(vec![
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("Invoice #:"))),
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("[INV-0001]"))),
])
.add_row(TableRow::new(vec![
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("Date:"))),
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("[Date]"))),
]))
.add_row(TableRow::new(vec![
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("Due Date:"))),
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("[Due Date]"))),
]));
docx = docx.add_table(invoice_info);
Ok(docx)
}
fn apply_contract_template(&self, mut docx: Docx) -> Result<Docx> {
// Contract title
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[CONTRACT TYPE] AGREEMENT").size(28).bold())
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(Paragraph::new());
// Parties
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("This Agreement is entered into as of [Date] between:"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Party 1 Name], a [Entity Type] (\"Party 1\")"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("and"))
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Party 2 Name], a [Entity Type] (\"Party 2\")"))
);
Ok(docx)
}
fn apply_memo_template(&self, mut docx: Docx) -> Result<Docx> {
// Memo header
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("MEMORANDUM").size(24).bold())
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(Paragraph::new());
// Memo fields
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("TO: ").bold())
.add_run(Run::new().add_text("[Recipient(s)]"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("FROM: ").bold())
.add_run(Run::new().add_text("[Sender]"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("DATE: ").bold())
.add_run(Run::new().add_text("[Date]"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("SUBJECT: ").bold())
.add_run(Run::new().add_text("[Subject]"))
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("_").repeat(70))
);
Ok(docx)
}
fn apply_newsletter_template(&self, mut docx: Docx) -> Result<Docx> {
// Newsletter header
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[NEWSLETTER TITLE]").size(36).bold())
.align(AlignmentType::Center)
);
docx = docx.add_paragraph(
Paragraph::new()
.add_run(Run::new().add_text("[Issue #] | [Date]").size(18))
.align(AlignmentType::Center)
);
// Two-column layout simulation
let columns = SectionProperty::new().columns(2);
docx = docx.add_section(columns);
Ok(docx)
}
}
// Supporting types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum DocumentTemplate {
BusinessLetter,
Resume,
Report,
Invoice,
Contract,
Memo,
Newsletter,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentProperties {
pub title: String,
pub subject: String,
pub author: String,
pub keywords: Vec<String>,
pub description: String,
pub company: Option<String>,
pub manager: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SectionConfig {
pub page_size: PageSize,
pub landscape: bool,
pub margins: Margins,
pub columns: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PageSize {
A4,
Letter,
Legal,
A3,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Margins {
pub top: f32,
pub bottom: f32,
pub left: f32,
pub right: f32,
pub header: f32,
pub footer: f32,
}
impl Default for Margins {
fn default() -> Self {
Self {
top: 25.4, // 1 inch in mm
bottom: 25.4,
left: 25.4,
right: 25.4,
header: 12.7, // 0.5 inch
footer: 12.7,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ChartType {
Bar,
Column,
Line,
Pie,
Area,
Scatter,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChartData {
pub title: String,
pub categories: Vec<String>,
pub series: Vec<ChartSeries>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChartSeries {
pub name: String,
pub values: Vec<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum WatermarkStyle {
Diagonal,
Horizontal,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CustomStyle {
pub id: String,
pub name: String,
pub based_on: Option<String>,
pub font: Option<String>,
pub size: Option<usize>,
pub bold: bool,
pub italic: bool,
pub color: Option<String>,
pub spacing: Option<StyleSpacing>,
pub indent: Option<StyleIndent>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StyleSpacing {
pub before: i32,
pub after: i32,
pub line: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StyleIndent {
pub left: i32,
pub right: i32,
pub first_line: i32,
}
+435
View File
@@ -0,0 +1,435 @@
use anyhow::{Context, Result};
use image::{DynamicImage, ImageFormat, Rgba, RgbaImage};
use printpdf::*;
use std::fs::{self, File};
use std::io::{BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use tempfile::NamedTempFile;
use tracing::{debug, info, warn};
use crate::pure_converter::PureRustConverter;
pub struct DocumentConverter {
pure_converter: PureRustConverter,
prefer_external_tools: bool,
}
impl DocumentConverter {
pub fn new() -> Self {
Self {
pure_converter: PureRustConverter::new(),
prefer_external_tools: false, // Default to pure Rust implementation
}
}
pub fn docx_to_pdf(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
if self.prefer_external_tools {
// Try external tools first if preferred
// Method 1: Try LibreOffice if available
if self.try_libreoffice_conversion(docx_path, pdf_path).is_ok() {
info!("Successfully converted DOCX to PDF using LibreOffice");
return Ok(());
}
// Method 2: Try unoconv if available
if self.try_unoconv_conversion(docx_path, pdf_path).is_ok() {
info!("Successfully converted DOCX to PDF using unoconv");
return Ok(());
}
}
// Use pure Rust implementation (default)
self.pure_converter.docx_to_pdf_pure(docx_path, pdf_path)?;
info!("Successfully converted DOCX to PDF using pure Rust implementation");
Ok(())
}
fn try_libreoffice_conversion(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
let output = Command::new("libreoffice")
.args(&[
"--headless",
"--invisible",
"--nodefault",
"--nolockcheck",
"--nologo",
"--norestore",
"--convert-to",
"pdf",
"--outdir",
pdf_path.parent().unwrap().to_str().unwrap(),
docx_path.to_str().unwrap(),
])
.output();
match output {
Ok(output) if output.status.success() => {
// LibreOffice creates the PDF with the same base name
let temp_pdf = pdf_path.parent().unwrap()
.join(docx_path.file_stem().unwrap())
.with_extension("pdf");
if temp_pdf != pdf_path {
fs::rename(&temp_pdf, pdf_path)?;
}
Ok(())
}
Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("LibreOffice conversion failed: {}", stderr)
}
Err(e) => {
debug!("LibreOffice not available: {}", e);
anyhow::bail!("LibreOffice not available")
}
}
}
fn try_unoconv_conversion(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
let output = Command::new("unoconv")
.args(&[
"-f", "pdf",
"-o", pdf_path.to_str().unwrap(),
docx_path.to_str().unwrap(),
])
.output();
match output {
Ok(output) if output.status.success() => Ok(()),
Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("unoconv conversion failed: {}", stderr)
}
Err(e) => {
debug!("unoconv not available: {}", e);
anyhow::bail!("unoconv not available")
}
}
}
fn basic_docx_to_pdf(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
// Extract text from DOCX
let text = dotext::extract_text(docx_path)
.with_context(|| format!("Failed to extract text from {:?}", docx_path))?;
// Create a basic PDF with the extracted text
let (doc, page1, layer1) = PdfDocument::new("Document", Mm(210.0), Mm(297.0), "Layer 1");
let current_layer = doc.get_page(page1).get_layer(layer1);
// Load a basic font
let font = doc.add_builtin_font(BuiltinFont::Helvetica)?;
// Split text into lines and add to PDF
let lines: Vec<&str> = text.text.lines().collect();
let mut y_position = Mm(280.0);
let line_height = Mm(5.0);
for line in lines {
if y_position < Mm(20.0) {
// Add new page if needed
let (page, layer) = doc.add_page(Mm(210.0), Mm(297.0), "Page layer");
let current_layer = doc.get_page(page).get_layer(layer);
y_position = Mm(280.0);
}
current_layer.use_text(line, 12.0, Mm(10.0), y_position, &font);
y_position -= line_height;
}
doc.save(&mut BufWriter::new(File::create(pdf_path)?))?;
Ok(())
}
pub fn pdf_to_images(
&self,
pdf_path: &Path,
output_dir: &Path,
format: ImageFormat,
dpi: u32,
) -> Result<Vec<PathBuf>> {
// Try multiple methods for PDF to image conversion
// Method 1: Try pdftoppm if available
if let Ok(images) = self.try_pdftoppm_conversion(pdf_path, output_dir, format, dpi) {
info!("Successfully converted PDF to images using pdftoppm");
return Ok(images);
}
// Method 2: Try ImageMagick if available
if let Ok(images) = self.try_imagemagick_conversion(pdf_path, output_dir, format, dpi) {
info!("Successfully converted PDF to images using ImageMagick");
return Ok(images);
}
// Method 3: Try Ghostscript if available
if let Ok(images) = self.try_ghostscript_conversion(pdf_path, output_dir, format, dpi) {
info!("Successfully converted PDF to images using Ghostscript");
return Ok(images);
}
anyhow::bail!("No PDF to image converter available. Please install pdftoppm, ImageMagick, or Ghostscript")
}
fn try_pdftoppm_conversion(
&self,
pdf_path: &Path,
output_dir: &Path,
format: ImageFormat,
dpi: u32,
) -> Result<Vec<PathBuf>> {
fs::create_dir_all(output_dir)?;
let output_prefix = output_dir.join("page");
let format_arg = match format {
ImageFormat::Png => "-png",
ImageFormat::Jpeg => "-jpeg",
_ => "-png",
};
let output = Command::new("pdftoppm")
.args(&[
format_arg,
"-r", &dpi.to_string(),
pdf_path.to_str().unwrap(),
output_prefix.to_str().unwrap(),
])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("pdftoppm failed: {}", stderr);
}
// Collect generated image files
let extension = match format {
ImageFormat::Png => "png",
ImageFormat::Jpeg => "jpg",
_ => "png",
};
let mut images = Vec::new();
for entry in fs::read_dir(output_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension() == Some(std::ffi::OsStr::new(extension)) {
images.push(path);
}
}
images.sort();
Ok(images)
}
fn try_imagemagick_conversion(
&self,
pdf_path: &Path,
output_dir: &Path,
format: ImageFormat,
dpi: u32,
) -> Result<Vec<PathBuf>> {
fs::create_dir_all(output_dir)?;
let extension = match format {
ImageFormat::Png => "png",
ImageFormat::Jpeg => "jpg",
_ => "png",
};
let output_pattern = output_dir.join(format!("page-%03d.{}", extension));
let output = Command::new("convert")
.args(&[
"-density", &dpi.to_string(),
pdf_path.to_str().unwrap(),
"-quality", "100",
output_pattern.to_str().unwrap(),
])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("ImageMagick convert failed: {}", stderr);
}
// Collect generated image files
let mut images = Vec::new();
for entry in fs::read_dir(output_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension() == Some(std::ffi::OsStr::new(extension)) {
images.push(path);
}
}
images.sort();
Ok(images)
}
fn try_ghostscript_conversion(
&self,
pdf_path: &Path,
output_dir: &Path,
format: ImageFormat,
dpi: u32,
) -> Result<Vec<PathBuf>> {
fs::create_dir_all(output_dir)?;
let device = match format {
ImageFormat::Png => "png16m",
ImageFormat::Jpeg => "jpeg",
_ => "png16m",
};
let extension = match format {
ImageFormat::Png => "png",
ImageFormat::Jpeg => "jpg",
_ => "png",
};
let output_pattern = output_dir.join(format!("page-%03d.{}", extension));
let output = Command::new("gs")
.args(&[
"-dNOPAUSE",
"-dBATCH",
"-sDEVICE", device,
&format!("-r{}", dpi),
"-dTextAlphaBits=4",
"-dGraphicsAlphaBits=4",
&format!("-sOutputFile={}", output_pattern.to_str().unwrap()),
pdf_path.to_str().unwrap(),
])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Ghostscript failed: {}", stderr);
}
// Collect generated image files
let mut images = Vec::new();
for entry in fs::read_dir(output_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension() == Some(std::ffi::OsStr::new(extension)) {
images.push(path);
}
}
images.sort();
Ok(images)
}
pub fn docx_to_images(
&self,
docx_path: &Path,
output_dir: &Path,
format: ImageFormat,
dpi: u32,
) -> Result<Vec<PathBuf>> {
// First convert DOCX to PDF
let temp_pdf = NamedTempFile::new()?.into_temp_path();
self.docx_to_pdf(docx_path, &temp_pdf)?;
// Then convert PDF to images
let images = self.pdf_to_images(&temp_pdf, output_dir, format, dpi)?;
Ok(images)
}
pub fn create_thumbnail(
&self,
image_path: &Path,
output_path: &Path,
width: u32,
height: u32,
) -> Result<()> {
let img = image::open(image_path)
.with_context(|| format!("Failed to open image {:?}", image_path))?;
let thumbnail = img.thumbnail(width, height);
thumbnail.save(output_path)
.with_context(|| format!("Failed to save thumbnail to {:?}", output_path))?;
info!("Created thumbnail {}x{} at {:?}", width, height, output_path);
Ok(())
}
pub fn merge_pdfs(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
// Try using pdftk if available
if self.try_pdftk_merge(pdf_paths, output_path).is_ok() {
info!("Successfully merged PDFs using pdftk");
return Ok(());
}
// Fallback to lopdf for merging
self.merge_pdfs_with_lopdf(pdf_paths, output_path)?;
info!("Successfully merged PDFs using lopdf");
Ok(())
}
fn try_pdftk_merge(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
let mut args = Vec::new();
for path in pdf_paths {
args.push(path.to_str().unwrap());
}
args.push("cat");
args.push("output");
args.push(output_path.to_str().unwrap());
let output = Command::new("pdftk")
.args(&args)
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("pdftk merge failed: {}", stderr);
}
Ok(())
}
fn merge_pdfs_with_lopdf(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
use lopdf::{Document, Object, ObjectId};
let mut merged = Document::new();
merged.version = "1.5".to_string();
for pdf_path in pdf_paths {
let mut doc = Document::load(pdf_path)?;
// Merge pages
for page_id in doc.get_pages().values() {
merged.add_object(doc.get_object(*page_id)?.clone());
}
}
merged.save(output_path)?;
Ok(())
}
pub fn split_pdf(&self, pdf_path: &Path, output_dir: &Path) -> Result<Vec<PathBuf>> {
use lopdf::Document;
fs::create_dir_all(output_dir)?;
let doc = Document::load(pdf_path)?;
let pages = doc.get_pages();
let mut output_paths = Vec::new();
for (i, (_, page_id)) in pages.iter().enumerate() {
let mut single_page = Document::new();
single_page.version = doc.version.clone();
// Clone the page to the new document
single_page.add_object(doc.get_object(*page_id)?.clone());
let output_path = output_dir.join(format!("page_{:03}.pdf", i + 1));
single_page.save(&output_path)?;
output_paths.push(output_path);
}
info!("Split PDF into {} pages", output_paths.len());
Ok(output_paths)
}
}
+408
View File
@@ -0,0 +1,408 @@
use anyhow::{Context, Result};
use docx_rs::*;
use std::fs::{self, File};
use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use tempfile::NamedTempFile;
use uuid::Uuid;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use tracing::{debug, info, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocxMetadata {
pub id: String,
pub path: PathBuf,
pub created_at: DateTime<Utc>,
pub modified_at: DateTime<Utc>,
pub size_bytes: u64,
pub page_count: Option<usize>,
pub word_count: Option<usize>,
pub author: Option<String>,
pub title: Option<String>,
pub subject: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocxStyle {
pub font_family: Option<String>,
pub font_size: Option<usize>,
pub bold: Option<bool>,
pub italic: Option<bool>,
pub underline: Option<bool>,
pub color: Option<String>,
pub alignment: Option<String>,
pub line_spacing: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TableData {
pub rows: Vec<Vec<String>>,
pub headers: Option<Vec<String>>,
pub border_style: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageData {
pub data: Vec<u8>,
pub width: Option<u32>,
pub height: Option<u32>,
pub alt_text: Option<String>,
}
pub struct DocxHandler {
temp_dir: PathBuf,
pub documents: std::collections::HashMap<String, DocxMetadata>,
}
impl DocxHandler {
pub fn new() -> Result<Self> {
let temp_dir = std::env::temp_dir().join("docx-mcp");
fs::create_dir_all(&temp_dir)?;
Ok(Self {
temp_dir,
documents: std::collections::HashMap::new(),
})
}
#[cfg(test)]
pub fn new_with_temp_dir(temp_dir: &Path) -> Result<Self> {
let temp_dir = temp_dir.to_path_buf();
fs::create_dir_all(&temp_dir)?;
Ok(Self {
temp_dir,
documents: std::collections::HashMap::new(),
})
}
pub fn create_document(&mut self) -> Result<String> {
let doc_id = Uuid::new_v4().to_string();
let doc_path = self.temp_dir.join(format!("{}.docx", doc_id));
let docx = Docx::new();
let file = File::create(&doc_path)?;
docx.build().pack(file)?;
let metadata = DocxMetadata {
id: doc_id.clone(),
path: doc_path,
created_at: Utc::now(),
modified_at: Utc::now(),
size_bytes: 0,
page_count: Some(1),
word_count: Some(0),
author: None,
title: None,
subject: None,
};
self.documents.insert(doc_id.clone(), metadata);
info!("Created new document with ID: {}", doc_id);
Ok(doc_id)
}
pub fn open_document(&mut self, path: &Path) -> Result<String> {
let doc_id = Uuid::new_v4().to_string();
let doc_path = self.temp_dir.join(format!("{}.docx", doc_id));
fs::copy(path, &doc_path)
.with_context(|| format!("Failed to copy document from {:?}", path))?;
let file_metadata = fs::metadata(&doc_path)?;
let metadata = DocxMetadata {
id: doc_id.clone(),
path: doc_path,
created_at: Utc::now(),
modified_at: Utc::now(),
size_bytes: file_metadata.len(),
page_count: None,
word_count: None,
author: None,
title: None,
subject: None,
};
self.documents.insert(doc_id.clone(), metadata);
info!("Opened document from {:?} with ID: {}", path, doc_id);
Ok(doc_id)
}
pub fn add_paragraph(&mut self, doc_id: &str, text: &str, style: Option<DocxStyle>) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let mut paragraph = Paragraph::new().add_run(Run::new().add_text(text));
if let Some(style) = style {
let mut run = Run::new().add_text(text);
if let Some(size) = style.font_size {
run = run.size(size);
}
if style.bold == Some(true) {
run = run.bold();
}
if style.italic == Some(true) {
run = run.italic();
}
if style.underline == Some(true) {
run = run.underline("single");
}
if let Some(color) = style.color {
run = run.color(color);
}
paragraph = Paragraph::new().add_run(run);
if let Some(alignment) = style.alignment {
paragraph = match alignment.as_str() {
"left" => paragraph.align(AlignmentType::Left),
"center" => paragraph.align(AlignmentType::Center),
"right" => paragraph.align(AlignmentType::Right),
"justify" => paragraph.align(AlignmentType::Justified),
_ => paragraph,
};
}
}
docx = docx.add_paragraph(paragraph);
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Added paragraph to document {}", doc_id);
Ok(())
}
pub fn add_heading(&mut self, doc_id: &str, text: &str, level: usize) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let heading_style = match level {
1 => "Heading1",
2 => "Heading2",
3 => "Heading3",
4 => "Heading4",
5 => "Heading5",
6 => "Heading6",
_ => "Heading1",
};
let paragraph = Paragraph::new()
.add_run(Run::new().add_text(text))
.style(heading_style);
docx = docx.add_paragraph(paragraph);
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Added heading level {} to document {}", level, doc_id);
Ok(())
}
pub fn add_table(&mut self, doc_id: &str, table_data: TableData) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let col_count = table_data.rows.get(0).map(|r| r.len()).unwrap_or(0);
let mut table = Table::new(vec![TableCell::new(); col_count]);
for row_data in table_data.rows {
let mut cells = Vec::new();
for cell_text in row_data {
let cell = TableCell::new()
.add_paragraph(Paragraph::new().add_run(Run::new().add_text(cell_text)));
cells.push(cell);
}
while cells.len() < col_count {
cells.push(TableCell::new());
}
table = table.add_row(TableRow::new(cells));
}
docx = docx.add_table(table);
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Added table to document {}", doc_id);
Ok(())
}
pub fn add_list(&mut self, doc_id: &str, items: Vec<String>, ordered: bool) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let numbering_id = if ordered { 1 } else { 2 };
for item in items {
let paragraph = Paragraph::new()
.add_run(Run::new().add_text(item))
.numbering(NumberingId::new(numbering_id), IndentLevel::new(0));
docx = docx.add_paragraph(paragraph);
}
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Added {} list to document {}", if ordered { "ordered" } else { "unordered" }, doc_id);
Ok(())
}
pub fn add_page_break(&mut self, doc_id: &str) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let paragraph = Paragraph::new().add_run(Run::new().add_break(BreakType::Page));
docx = docx.add_paragraph(paragraph);
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Added page break to document {}", doc_id);
Ok(())
}
pub fn set_header(&mut self, doc_id: &str, text: &str) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let header = Header::new().add_paragraph(
Paragraph::new().add_run(Run::new().add_text(text))
);
docx = docx.header(header);
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Set header for document {}", doc_id);
Ok(())
}
pub fn set_footer(&mut self, doc_id: &str, text: &str) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
let mut file = File::open(&metadata.path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let mut docx = Docx::from_reader(&buffer[..])?;
let footer = Footer::new().add_paragraph(
Paragraph::new().add_run(Run::new().add_text(text))
);
docx = docx.footer(footer);
let file = File::create(&metadata.path)?;
docx.build().pack(file)?;
info!("Set footer for document {}", doc_id);
Ok(())
}
pub fn find_and_replace(&mut self, doc_id: &str, find_text: &str, replace_text: &str) -> Result<usize> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
// Note: This is a simplified implementation
// Real implementation would need to parse the DOCX XML structure
// and perform replacements while preserving formatting
warn!("Find and replace operation requires advanced XML manipulation");
Ok(0)
}
pub fn extract_text(&self, doc_id: &str) -> Result<String> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
// Use pure Rust text extraction
use crate::pure_converter::PureRustConverter;
let converter = PureRustConverter::new();
let text = converter.extract_text_from_docx(&metadata.path)
.with_context(|| format!("Failed to extract text from document {}", doc_id))?;
Ok(text)
}
pub fn get_metadata(&self, doc_id: &str) -> Result<DocxMetadata> {
self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))
.map(|m| m.clone())
}
pub fn save_document(&self, doc_id: &str, output_path: &Path) -> Result<()> {
let metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
fs::copy(&metadata.path, output_path)
.with_context(|| format!("Failed to save document to {:?}", output_path))?;
info!("Saved document {} to {:?}", doc_id, output_path);
Ok(())
}
pub fn close_document(&mut self, doc_id: &str) -> Result<()> {
let metadata = self.documents.remove(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
if metadata.path.exists() {
fs::remove_file(&metadata.path)?;
}
info!("Closed document {}", doc_id);
Ok(())
}
pub fn list_documents(&self) -> Vec<DocxMetadata> {
self.documents.values().cloned().collect()
}
}
+1091
View File
File diff suppressed because it is too large Load Diff
+50
View File
@@ -0,0 +1,50 @@
use once_cell::sync::Lazy;
// Conditionally embed fonts if they exist
// If fonts don't exist, we'll use empty placeholders and rely on PDF built-in fonts
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
pub static LIBERATION_SANS_REGULAR: &[u8] = include_bytes!("../assets/fonts/LiberationSans-Regular.ttf");
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
pub static LIBERATION_SANS_REGULAR: &[u8] = &[];
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
pub static LIBERATION_SANS_BOLD: &[u8] = include_bytes!("../assets/fonts/LiberationSans-Bold.ttf");
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
pub static LIBERATION_SANS_BOLD: &[u8] = &[];
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
pub static LIBERATION_SANS_ITALIC: &[u8] = include_bytes!("../assets/fonts/LiberationSans-Italic.ttf");
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
pub static LIBERATION_SANS_ITALIC: &[u8] = &[];
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
pub static LIBERATION_MONO_REGULAR: &[u8] = include_bytes!("../assets/fonts/LiberationMono-Regular.ttf");
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
pub static LIBERATION_MONO_REGULAR: &[u8] = &[];
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
pub const EMBEDDED_FONT_REGULAR: &[u8] = include_bytes!("../assets/fonts/NotoSans-Regular.ttf");
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
pub const EMBEDDED_FONT_REGULAR: &[u8] = &[];
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
pub const EMBEDDED_FONT_BOLD: &[u8] = include_bytes!("../assets/fonts/NotoSans-Bold.ttf");
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
pub const EMBEDDED_FONT_BOLD: &[u8] = &[];
pub struct EmbeddedFonts {
pub regular: &'static [u8],
pub bold: &'static [u8],
pub italic: &'static [u8],
pub mono: &'static [u8],
}
pub static FONTS: Lazy<EmbeddedFonts> = Lazy::new(|| {
EmbeddedFonts {
regular: LIBERATION_SANS_REGULAR,
bold: LIBERATION_SANS_BOLD,
italic: LIBERATION_SANS_ITALIC,
mono: LIBERATION_MONO_REGULAR,
}
});
+43
View File
@@ -0,0 +1,43 @@
use anyhow::Result;
use mcp_server::{Server, ServerBuilder, ServerOptions};
use mcp_core::ToolManager;
use tracing::info;
use tracing_subscriber::{EnvFilter, fmt, prelude::*};
mod docx_tools;
mod docx_handler;
mod converter;
mod pure_converter;
mod advanced_docx;
mod security;
#[cfg(feature = "embedded-fonts")]
mod fonts;
use docx_tools::DocxToolsProvider;
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::registry()
.with(fmt::layer())
.with(EnvFilter::from_default_env())
.init();
// Load security configuration from environment
let security_config = security::SecurityConfig::from_env();
info!("Starting DOCX MCP Server - Security: {}", security_config.get_summary());
let docx_provider = DocxToolsProvider::new_with_security(security_config);
let options = ServerOptions::default()
.with_name("docx-mcp-server")
.with_version("0.1.0");
let server = ServerBuilder::new(options)
.with_tool_provider(docx_provider)
.build();
server.run().await?;
Ok(())
}
+423
View File
@@ -0,0 +1,423 @@
use anyhow::{Context, Result};
use image::{DynamicImage, ImageFormat, Rgba, RgbaImage};
use printpdf::*;
use std::fs::{self, File};
use std::io::{BufReader, BufWriter, Read, Write};
use std::path::{Path, PathBuf};
use tempfile::NamedTempFile;
use tracing::{debug, info, warn};
use roxmltree;
use zip::ZipArchive;
use rusttype::{Font, Scale};
use lopdf;
pub struct PureRustConverter;
impl PureRustConverter {
pub fn new() -> Self {
Self
}
/// Extract text from DOCX using pure Rust XML parsing
pub fn extract_text_from_docx(&self, docx_path: &Path) -> Result<String> {
let file = File::open(docx_path)?;
let mut archive = ZipArchive::new(file)?;
// Find the main document XML
let mut document_xml = String::new();
for i in 0..archive.len() {
let mut file = archive.by_index(i)?;
let name = file.name().to_string();
if name == "word/document.xml" {
file.read_to_string(&mut document_xml)?;
break;
}
}
if document_xml.is_empty() {
anyhow::bail!("No document.xml found in DOCX file");
}
// Parse XML and extract text
let doc = roxmltree::Document::parse(&document_xml)?;
let mut text = String::new();
// Extract text from all w:t elements
for node in doc.descendants() {
if node.tag_name().name() == "t" {
if let Some(node_text) = node.text() {
text.push_str(node_text);
text.push(' ');
}
}
// Handle line breaks
if node.tag_name().name() == "br" || node.tag_name().name() == "p" {
text.push('\n');
}
}
Ok(text.trim().to_string())
}
/// Convert DOCX to PDF using pure Rust (no external dependencies)
pub fn docx_to_pdf_pure(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
// Extract text from DOCX
let text = self.extract_text_from_docx(docx_path)
.with_context(|| format!("Failed to extract text from {:?}", docx_path))?;
// Create PDF with extracted text
self.create_pdf_from_text(&text, pdf_path)?;
info!("Successfully converted DOCX to PDF using pure Rust");
Ok(())
}
/// Create a PDF from text content
pub fn create_pdf_from_text(&self, text: &str, pdf_path: &Path) -> Result<()> {
let (doc, page1, layer1) = PdfDocument::new("Document", Mm(210.0), Mm(297.0), "Layer 1");
let current_layer = doc.get_page(page1).get_layer(layer1);
// Use embedded font or built-in font
let font = doc.add_builtin_font(BuiltinFont::Helvetica)?;
// Configure text layout
let font_size = 11.0;
let line_height = Mm(5.0);
let margin_left = Mm(20.0);
let margin_top = Mm(280.0);
let margin_bottom = Mm(20.0);
let page_width = Mm(210.0);
let page_height = Mm(297.0);
let text_width = page_width - (margin_left * 2.0);
let lines: Vec<&str> = text.lines().collect();
let mut current_page = page1;
let mut current_layer = layer1;
let mut y_position = margin_top;
for line in lines {
// Check if we need a new page
if y_position < margin_bottom {
let (new_page, new_layer) = doc.add_page(Mm(210.0), Mm(297.0), "Page layer");
current_page = new_page;
current_layer = new_layer;
y_position = margin_top;
}
// Word wrap if line is too long
let words: Vec<&str> = line.split_whitespace().collect();
let mut current_line = String::new();
let max_chars_per_line = 80; // Approximate
for word in words {
if current_line.len() + word.len() + 1 > max_chars_per_line {
// Write current line
if !current_line.is_empty() {
doc.get_page(current_page)
.get_layer(current_layer)
.use_text(&current_line, font_size, margin_left, y_position, &font);
y_position -= line_height;
current_line.clear();
// Check for new page
if y_position < margin_bottom {
let (new_page, new_layer) = doc.add_page(Mm(210.0), Mm(297.0), "Page layer");
current_page = new_page;
current_layer = new_layer;
y_position = margin_top;
}
}
}
if !current_line.is_empty() {
current_line.push(' ');
}
current_line.push_str(word);
}
// Write remaining text in line
if !current_line.is_empty() {
doc.get_page(current_page)
.get_layer(current_layer)
.use_text(&current_line, font_size, margin_left, y_position, &font);
y_position -= line_height;
}
}
// Save PDF
doc.save(&mut BufWriter::new(File::create(pdf_path)?))?;
Ok(())
}
/// Convert PDF to images using pure Rust
pub fn pdf_to_images_pure(
&self,
pdf_path: &Path,
output_dir: &Path,
format: ImageFormat,
) -> Result<Vec<PathBuf>> {
// Parse PDF
let doc = lopdf::Document::load(pdf_path)?;
let pages = doc.get_pages();
fs::create_dir_all(output_dir)?;
let mut output_paths = Vec::new();
// For each page, render to image
for (page_num, (_page_num, _page_id)) in pages.iter().enumerate() {
// Create a blank image for the page
// In a real implementation, you would render the PDF content
let img = self.render_pdf_page_to_image(&doc, page_num)?;
// Save image
let extension = match format {
ImageFormat::Png => "png",
ImageFormat::Jpeg => "jpg",
_ => "png",
};
let output_path = output_dir.join(format!("page_{:03}.{}", page_num + 1, extension));
img.save_with_format(&output_path, format)?;
output_paths.push(output_path);
}
Ok(output_paths)
}
/// Render a PDF page to image (simplified implementation)
fn render_pdf_page_to_image(&self, _doc: &lopdf::Document, _page_num: usize) -> Result<DynamicImage> {
// This is a simplified implementation
// A full implementation would parse PDF content and render it
// Create a white image as placeholder
let width = 1240; // A4 at 150 DPI
let height = 1754; // A4 at 150 DPI
let mut img = RgbaImage::new(width, height);
// Fill with white background
for pixel in img.pixels_mut() {
*pixel = Rgba([255, 255, 255, 255]);
}
// Add a simple text indicator
// In production, you would properly render PDF content
Ok(DynamicImage::ImageRgba8(img))
}
/// Convert DOCX to images using pure Rust
pub fn docx_to_images_pure(
&self,
docx_path: &Path,
output_dir: &Path,
format: ImageFormat,
) -> Result<Vec<PathBuf>> {
// First convert to PDF
let temp_pdf = NamedTempFile::new()?.into_temp_path();
self.docx_to_pdf_pure(docx_path, &temp_pdf)?;
// Then convert PDF to images
self.pdf_to_images_pure(&temp_pdf, output_dir, format)
}
/// Create a thumbnail from an image
pub fn create_thumbnail(
&self,
image_path: &Path,
output_path: &Path,
width: u32,
height: u32,
) -> Result<()> {
let img = image::open(image_path)
.with_context(|| format!("Failed to open image {:?}", image_path))?;
let thumbnail = img.thumbnail(width, height);
thumbnail.save(output_path)
.with_context(|| format!("Failed to save thumbnail to {:?}", output_path))?;
info!("Created thumbnail {}x{} at {:?}", width, height, output_path);
Ok(())
}
/// Merge multiple PDFs using pure Rust
pub fn merge_pdfs_pure(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
use lopdf::{Document, Object, ObjectId};
// Create a new document for merging
let mut merged_doc = Document::with_version("1.5");
// Track page tree
let mut all_pages = Vec::new();
for pdf_path in pdf_paths {
let doc = Document::load(pdf_path)?;
// Get pages from the document
let pages = doc.get_pages();
for (_page_num, page_id) in pages.iter() {
// Clone the page object
if let Ok(page_obj) = doc.get_object(*page_id) {
let new_id = merged_doc.new_object_id();
merged_doc.objects.insert(new_id, page_obj.clone());
all_pages.push(new_id);
}
}
}
// Build the page tree for merged document
let pages_id = merged_doc.new_object_id();
let pages_dict = lopdf::dictionary! {
"Type" => "Pages",
"Kids" => all_pages.iter().map(|id| Object::Reference(*id)).collect::<Vec<_>>(),
"Count" => all_pages.len() as i32,
};
merged_doc.objects.insert(pages_id, Object::Dictionary(pages_dict));
// Update catalog
let catalog_id = merged_doc.new_object_id();
let catalog = lopdf::dictionary! {
"Type" => "Catalog",
"Pages" => Object::Reference(pages_id),
};
merged_doc.objects.insert(catalog_id, Object::Dictionary(catalog));
merged_doc.trailer.set("Root", Object::Reference(catalog_id));
// Save the merged PDF
merged_doc.save(output_path)?;
info!("Successfully merged {} PDFs into {:?}", pdf_paths.len(), output_path);
Ok(())
}
/// Split a PDF into individual pages using pure Rust
pub fn split_pdf_pure(&self, pdf_path: &Path, output_dir: &Path) -> Result<Vec<PathBuf>> {
use lopdf::Document;
fs::create_dir_all(output_dir)?;
let doc = Document::load(pdf_path)?;
let pages = doc.get_pages();
let mut output_paths = Vec::new();
for (i, (_page_num, page_id)) in pages.iter().enumerate() {
// Create a new document with just this page
let mut single_page_doc = Document::with_version("1.5");
// Clone the page
if let Ok(page_obj) = doc.get_object(*page_id) {
let new_page_id = single_page_doc.new_object_id();
single_page_doc.objects.insert(new_page_id, page_obj.clone());
// Create page tree
let pages_id = single_page_doc.new_object_id();
let pages_dict = lopdf::dictionary! {
"Type" => "Pages",
"Kids" => vec![Object::Reference(new_page_id)],
"Count" => 1,
};
single_page_doc.objects.insert(pages_id, Object::Dictionary(pages_dict));
// Create catalog
let catalog_id = single_page_doc.new_object_id();
let catalog = lopdf::dictionary! {
"Type" => "Catalog",
"Pages" => Object::Reference(pages_id),
};
single_page_doc.objects.insert(catalog_id, Object::Dictionary(catalog));
single_page_doc.trailer.set("Root", Object::Reference(catalog_id));
// Save the page
let output_path = output_dir.join(format!("page_{:03}.pdf", i + 1));
single_page_doc.save(&output_path)?;
output_paths.push(output_path);
}
}
info!("Split PDF into {} pages", output_paths.len());
Ok(output_paths)
}
/// Parse and render markdown to PDF
pub fn markdown_to_pdf(&self, markdown: &str, pdf_path: &Path) -> Result<()> {
use pulldown_cmark::{Parser, Event, Tag, TagEnd};
let parser = Parser::new(markdown);
let mut plain_text = String::new();
let mut in_code_block = false;
let mut list_depth = 0;
for event in parser {
match event {
Event::Text(text) => {
if in_code_block {
plain_text.push_str(" ");
} else if list_depth > 0 {
plain_text.push_str(&" ".repeat(list_depth));
}
plain_text.push_str(&text);
}
Event::Start(tag) => {
match tag {
Tag::Heading { level, .. } => {
plain_text.push('\n');
plain_text.push_str(&"#".repeat(level as usize));
plain_text.push(' ');
}
Tag::Paragraph => {
if !plain_text.is_empty() {
plain_text.push_str("\n\n");
}
}
Tag::List(_) => {
list_depth += 1;
plain_text.push('\n');
}
Tag::Item => {
plain_text.push_str("");
}
Tag::CodeBlock(_) => {
in_code_block = true;
plain_text.push_str("\n\n");
}
Tag::Emphasis => plain_text.push('*'),
Tag::Strong => plain_text.push_str("**"),
_ => {}
}
}
Event::End(tag) => {
match tag {
TagEnd::Heading(_) => plain_text.push_str("\n\n"),
TagEnd::Paragraph => plain_text.push('\n'),
TagEnd::List(_) => {
list_depth = list_depth.saturating_sub(1);
plain_text.push('\n');
}
TagEnd::Item => plain_text.push('\n'),
TagEnd::CodeBlock => {
in_code_block = false;
plain_text.push_str("\n\n");
}
TagEnd::Emphasis => plain_text.push('*'),
TagEnd::Strong => plain_text.push_str("**"),
_ => {}
}
}
Event::Code(code) => {
plain_text.push('`');
plain_text.push_str(&code);
plain_text.push('`');
}
Event::SoftBreak => plain_text.push(' '),
Event::HardBreak => plain_text.push('\n'),
_ => {}
}
}
self.create_pdf_from_text(&plain_text, pdf_path)?;
Ok(())
}
}
+397
View File
@@ -0,0 +1,397 @@
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::env;
use tracing::{debug, info, warn};
/// Security configuration for the MCP server
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SecurityConfig {
/// If true, only allow read-only operations
pub readonly_mode: bool,
/// Whitelist of allowed commands (if set, only these commands are allowed)
pub command_whitelist: Option<HashSet<String>>,
/// Blacklist of forbidden commands (if set, these commands are blocked)
pub command_blacklist: Option<HashSet<String>>,
/// Maximum document size in bytes (default: 100MB)
pub max_document_size: usize,
/// Maximum number of open documents (default: 50)
pub max_open_documents: usize,
/// Allow external tool usage (LibreOffice, etc.)
pub allow_external_tools: bool,
/// Allow network operations (downloading templates, fonts, etc.)
pub allow_network: bool,
/// Sandbox mode - restricts file operations to temp directory only
pub sandbox_mode: bool,
}
impl Default for SecurityConfig {
fn default() -> Self {
Self {
readonly_mode: false,
command_whitelist: None,
command_blacklist: None,
max_document_size: 100 * 1024 * 1024, // 100MB
max_open_documents: 50,
allow_external_tools: true,
allow_network: true,
sandbox_mode: false,
}
}
}
impl SecurityConfig {
/// Load configuration from environment variables
pub fn from_env() -> Self {
let mut config = Self::default();
// Check for readonly mode
if env::var("DOCX_MCP_READONLY").unwrap_or_default() == "true" {
config.readonly_mode = true;
info!("Running in READONLY mode - only viewing operations allowed");
}
// Check for command whitelist
if let Ok(whitelist) = env::var("DOCX_MCP_WHITELIST") {
let commands: HashSet<String> = whitelist
.split(',')
.map(|s| s.trim().to_string())
.collect();
config.command_whitelist = Some(commands.clone());
info!("Command whitelist enabled with {} commands", commands.len());
}
// Check for command blacklist
if let Ok(blacklist) = env::var("DOCX_MCP_BLACKLIST") {
let commands: HashSet<String> = blacklist
.split(',')
.map(|s| s.trim().to_string())
.collect();
config.command_blacklist = Some(commands.clone());
info!("Command blacklist enabled with {} blocked commands", commands.len());
}
// Check for sandbox mode
if env::var("DOCX_MCP_SANDBOX").unwrap_or_default() == "true" {
config.sandbox_mode = true;
config.allow_external_tools = false;
config.allow_network = false;
info!("Running in SANDBOX mode - restricted file operations");
}
// Check for external tools permission
if env::var("DOCX_MCP_NO_EXTERNAL_TOOLS").unwrap_or_default() == "true" {
config.allow_external_tools = false;
info!("External tools disabled");
}
// Check for network permission
if env::var("DOCX_MCP_NO_NETWORK").unwrap_or_default() == "true" {
config.allow_network = false;
info!("Network operations disabled");
}
// Max document size
if let Ok(size) = env::var("DOCX_MCP_MAX_SIZE") {
if let Ok(bytes) = size.parse::<usize>() {
config.max_document_size = bytes;
info!("Max document size set to {} bytes", bytes);
}
}
// Max open documents
if let Ok(max) = env::var("DOCX_MCP_MAX_DOCS") {
if let Ok(count) = max.parse::<usize>() {
config.max_open_documents = count;
info!("Max open documents set to {}", count);
}
}
config
}
/// Check if a command is allowed based on security configuration
pub fn is_command_allowed(&self, command: &str) -> bool {
// First check if it's a readonly command
let readonly_commands = Self::get_readonly_commands();
let is_readonly_command = readonly_commands.contains(command);
// In readonly mode, only allow readonly commands
if self.readonly_mode && !is_readonly_command {
debug!("Command '{}' blocked: readonly mode", command);
return false;
}
// Check whitelist (if set, only whitelisted commands are allowed)
if let Some(ref whitelist) = self.command_whitelist {
if !whitelist.contains(command) {
debug!("Command '{}' blocked: not in whitelist", command);
return false;
}
}
// Check blacklist (if set, blacklisted commands are blocked)
if let Some(ref blacklist) = self.command_blacklist {
if blacklist.contains(command) {
debug!("Command '{}' blocked: in blacklist", command);
return false;
}
}
// Additional checks for specific command categories
if command.starts_with("convert_") && !self.allow_external_tools {
debug!("Command '{}' blocked: external tools disabled", command);
return false;
}
true
}
/// Get list of readonly commands
pub fn get_readonly_commands() -> HashSet<&'static str> {
let mut commands = HashSet::new();
// Document viewing commands
commands.insert("open_document");
commands.insert("extract_text");
commands.insert("get_metadata");
commands.insert("list_documents");
commands.insert("get_document_info");
commands.insert("read_paragraph");
commands.insert("read_table");
commands.insert("read_section");
commands.insert("search_text");
commands.insert("get_document_structure");
commands.insert("get_styles");
commands.insert("get_headers_footers");
commands.insert("get_page_count");
commands.insert("get_word_count");
commands.insert("get_table_of_contents");
commands.insert("list_bookmarks");
commands.insert("list_hyperlinks");
commands.insert("list_comments");
commands.insert("list_footnotes");
commands.insert("list_endnotes");
commands.insert("get_document_properties");
// Analysis commands
commands.insert("analyze_formatting");
commands.insert("check_spelling");
commands.insert("check_grammar");
commands.insert("get_statistics");
commands.insert("compare_documents");
// Export commands (readonly as they don't modify the original)
commands.insert("export_to_json");
commands.insert("export_to_markdown");
commands.insert("export_to_html");
commands.insert("create_preview");
commands
}
/// Get list of write commands (for documentation)
pub fn get_write_commands() -> HashSet<&'static str> {
let mut commands = HashSet::new();
// Document creation/modification
commands.insert("create_document");
commands.insert("save_document");
commands.insert("close_document");
// Content addition
commands.insert("add_paragraph");
commands.insert("add_heading");
commands.insert("add_table");
commands.insert("add_list");
commands.insert("add_page_break");
commands.insert("add_section_break");
commands.insert("add_image");
commands.insert("add_chart");
commands.insert("add_shape");
commands.insert("add_hyperlink");
commands.insert("add_bookmark");
commands.insert("add_footnote");
commands.insert("add_endnote");
commands.insert("add_comment");
commands.insert("add_watermark");
// Content modification
commands.insert("edit_paragraph");
commands.insert("delete_paragraph");
commands.insert("find_and_replace");
commands.insert("update_table");
commands.insert("update_style");
commands.insert("set_header");
commands.insert("set_footer");
commands.insert("set_margins");
commands.insert("set_page_size");
commands.insert("apply_template");
commands.insert("apply_style");
commands.insert("apply_theme");
// Document operations
commands.insert("merge_documents");
commands.insert("split_document");
commands.insert("convert_to_pdf");
commands.insert("convert_to_images");
commands.insert("protect_document");
commands.insert("unprotect_document");
commands.insert("track_changes");
commands.insert("accept_changes");
commands.insert("reject_changes");
commands
}
/// Check if a file path is allowed based on sandbox configuration
pub fn is_path_allowed(&self, path: &std::path::Path) -> bool {
if !self.sandbox_mode {
return true;
}
// In sandbox mode, only allow operations in temp directory
let temp_dir = std::env::temp_dir();
if let Ok(canonical_path) = path.canonicalize() {
if let Ok(canonical_temp) = temp_dir.canonicalize() {
return canonical_path.starts_with(canonical_temp);
}
}
false
}
/// Get a summary of current security settings
pub fn get_summary(&self) -> String {
let mut summary = Vec::new();
if self.readonly_mode {
summary.push("📖 READONLY MODE");
}
if self.sandbox_mode {
summary.push("🔒 SANDBOX MODE");
}
if let Some(ref whitelist) = self.command_whitelist {
summary.push(&format!("✅ Whitelist: {} commands", whitelist.len()));
}
if let Some(ref blacklist) = self.command_blacklist {
summary.push(&format!("🚫 Blacklist: {} commands", blacklist.len()));
}
if !self.allow_external_tools {
summary.push("🔧 No external tools");
}
if !self.allow_network {
summary.push("🌐 No network access");
}
if summary.is_empty() {
"Standard mode (all features enabled)".to_string()
} else {
summary.join(" | ")
}
}
}
/// Security middleware to check commands before execution
pub struct SecurityMiddleware {
config: SecurityConfig,
}
impl SecurityMiddleware {
pub fn new(config: SecurityConfig) -> Self {
Self { config }
}
/// Check if a command should be allowed to execute
pub fn check_command(&self, command: &str, arguments: &serde_json::Value) -> Result<(), SecurityError> {
// Check if command is allowed
if !self.config.is_command_allowed(command) {
return Err(SecurityError::CommandNotAllowed(command.to_string()));
}
// Check file paths in arguments if in sandbox mode
if self.config.sandbox_mode {
self.check_paths_in_arguments(arguments)?;
}
// Check document size limits for open/create operations
if command == "open_document" {
if let Some(path) = arguments.get("path").and_then(|v| v.as_str()) {
self.check_file_size(path)?;
}
}
Ok(())
}
fn check_paths_in_arguments(&self, arguments: &serde_json::Value) -> Result<(), SecurityError> {
// Recursively check all string values that look like paths
match arguments {
serde_json::Value::String(s) => {
if s.contains('/') || s.contains('\\') {
let path = std::path::Path::new(s);
if !self.config.is_path_allowed(path) {
return Err(SecurityError::PathNotAllowed(s.to_string()));
}
}
}
serde_json::Value::Object(map) => {
for value in map.values() {
self.check_paths_in_arguments(value)?;
}
}
serde_json::Value::Array(arr) => {
for value in arr {
self.check_paths_in_arguments(value)?;
}
}
_ => {}
}
Ok(())
}
fn check_file_size(&self, path: &str) -> Result<(), SecurityError> {
let file_path = std::path::Path::new(path);
if let Ok(metadata) = std::fs::metadata(file_path) {
if metadata.len() as usize > self.config.max_document_size {
return Err(SecurityError::FileTooLarge {
size: metadata.len() as usize,
max_size: self.config.max_document_size,
});
}
}
Ok(())
}
}
#[derive(Debug, thiserror::Error)]
pub enum SecurityError {
#[error("Command not allowed: {0}")]
CommandNotAllowed(String),
#[error("Path not allowed in sandbox mode: {0}")]
PathNotAllowed(String),
#[error("File too large: {size} bytes (max: {max_size} bytes)")]
FileTooLarge { size: usize, max_size: usize },
#[error("Maximum number of open documents exceeded")]
TooManyDocuments,
#[error("Operation requires external tools which are disabled")]
ExternalToolsDisabled,
#[error("Operation requires network access which is disabled")]
NetworkDisabled,
}