Initial Commit
This commit is contained in:
@@ -0,0 +1,868 @@
|
||||
use anyhow::{Context, Result};
|
||||
use docx_rs::*;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
use base64;
|
||||
|
||||
/// Advanced DOCX manipulation features
|
||||
pub struct AdvancedDocxHandler;
|
||||
|
||||
impl AdvancedDocxHandler {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Create a document with professional template
|
||||
pub fn create_from_template(&self, template_type: DocumentTemplate) -> Result<Docx> {
|
||||
let mut docx = Docx::new();
|
||||
|
||||
match template_type {
|
||||
DocumentTemplate::BusinessLetter => {
|
||||
docx = self.apply_business_letter_template(docx)?;
|
||||
}
|
||||
DocumentTemplate::Resume => {
|
||||
docx = self.apply_resume_template(docx)?;
|
||||
}
|
||||
DocumentTemplate::Report => {
|
||||
docx = self.apply_report_template(docx)?;
|
||||
}
|
||||
DocumentTemplate::Invoice => {
|
||||
docx = self.apply_invoice_template(docx)?;
|
||||
}
|
||||
DocumentTemplate::Contract => {
|
||||
docx = self.apply_contract_template(docx)?;
|
||||
}
|
||||
DocumentTemplate::Memo => {
|
||||
docx = self.apply_memo_template(docx)?;
|
||||
}
|
||||
DocumentTemplate::Newsletter => {
|
||||
docx = self.apply_newsletter_template(docx)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Add a table of contents
|
||||
pub fn add_table_of_contents(&self, docx: Docx) -> Result<Docx> {
|
||||
let toc = TableOfContents::new()
|
||||
.heading_text("Table of Contents")
|
||||
.heading_style("TOCHeading");
|
||||
|
||||
let mut docx = docx.add_table_of_contents(toc);
|
||||
|
||||
// Add instruction text
|
||||
let instruction = Paragraph::new()
|
||||
.add_run(
|
||||
Run::new()
|
||||
.add_text("Right-click and select 'Update Field' to refresh the table of contents")
|
||||
.italic()
|
||||
.size(20)
|
||||
.color("808080")
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(instruction);
|
||||
docx = docx.add_paragraph(Paragraph::new().add_run(Run::new().add_break(BreakType::Page)));
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Add an image to the document
|
||||
pub fn add_image(
|
||||
&self,
|
||||
docx: Docx,
|
||||
image_data: &[u8],
|
||||
width_px: u32,
|
||||
height_px: u32,
|
||||
alt_text: Option<&str>
|
||||
) -> Result<Docx> {
|
||||
// Convert pixels to EMUs (English Metric Units)
|
||||
// 1 pixel = 9525 EMUs
|
||||
let width_emu = width_px * 9525;
|
||||
let height_emu = height_px * 9525;
|
||||
|
||||
let drawing = Drawing::new()
|
||||
.inline(
|
||||
Inline::new()
|
||||
.extent(width_emu, height_emu)
|
||||
.graphic(
|
||||
Graphic::new()
|
||||
.graphic_data(
|
||||
GraphicData::new()
|
||||
.pic(
|
||||
Pic::new()
|
||||
.blip_fill(image_data.to_vec())
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_run(Run::new().add_drawing(drawing));
|
||||
|
||||
Ok(docx.add_paragraph(paragraph))
|
||||
}
|
||||
|
||||
/// Add a chart to the document
|
||||
pub fn add_chart(&self, docx: Docx, chart_type: ChartType, data: ChartData) -> Result<Docx> {
|
||||
// Charts in DOCX are complex and usually require embedding Excel data
|
||||
// For now, we'll create a table representation
|
||||
let mut table = Table::new(vec![]);
|
||||
|
||||
// Add headers
|
||||
let mut header_cells = vec![TableCell::new().add_paragraph(
|
||||
Paragraph::new().add_run(Run::new().add_text("Category").bold())
|
||||
)];
|
||||
|
||||
for series in &data.series {
|
||||
header_cells.push(
|
||||
TableCell::new().add_paragraph(
|
||||
Paragraph::new().add_run(Run::new().add_text(&series.name).bold())
|
||||
)
|
||||
);
|
||||
}
|
||||
table = table.add_row(TableRow::new(header_cells));
|
||||
|
||||
// Add data rows
|
||||
for (i, category) in data.categories.iter().enumerate() {
|
||||
let mut row_cells = vec![TableCell::new().add_paragraph(
|
||||
Paragraph::new().add_run(Run::new().add_text(category))
|
||||
)];
|
||||
|
||||
for series in &data.series {
|
||||
if let Some(value) = series.values.get(i) {
|
||||
row_cells.push(
|
||||
TableCell::new().add_paragraph(
|
||||
Paragraph::new().add_run(Run::new().add_text(&value.to_string()))
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
table = table.add_row(TableRow::new(row_cells));
|
||||
}
|
||||
|
||||
// Add title for the chart
|
||||
let title = Paragraph::new()
|
||||
.add_run(Run::new().add_text(&format!("{:?}: {}", chart_type, data.title)).bold())
|
||||
.align(AlignmentType::Center);
|
||||
|
||||
Ok(docx.add_paragraph(title).add_table(table))
|
||||
}
|
||||
|
||||
/// Add a hyperlink
|
||||
pub fn add_hyperlink(&self, docx: Docx, text: &str, url: &str) -> Result<Docx> {
|
||||
let hyperlink = Hyperlink::new(url, HyperlinkType::External)
|
||||
.add_run(Run::new().add_text(text).color("0000FF").underline("single"));
|
||||
|
||||
let paragraph = Paragraph::new().add_hyperlink(hyperlink);
|
||||
|
||||
Ok(docx.add_paragraph(paragraph))
|
||||
}
|
||||
|
||||
/// Add a bookmark
|
||||
pub fn add_bookmark(&self, docx: Docx, bookmark_name: &str, text: &str) -> Result<Docx> {
|
||||
let bookmark_id = Uuid::new_v4().to_string();
|
||||
|
||||
let bookmark_start = BookmarkStart::new(&bookmark_id, bookmark_name);
|
||||
let bookmark_end = BookmarkEnd::new(&bookmark_id);
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_bookmark_start(bookmark_start)
|
||||
.add_run(Run::new().add_text(text))
|
||||
.add_bookmark_end(bookmark_end);
|
||||
|
||||
Ok(docx.add_paragraph(paragraph))
|
||||
}
|
||||
|
||||
/// Add a cross-reference
|
||||
pub fn add_cross_reference(&self, docx: Docx, bookmark_name: &str, display_text: &str) -> Result<Docx> {
|
||||
// Cross-references in DOCX use field codes
|
||||
let field = ComplexField::new()
|
||||
.instruction(&format!("REF {} \\h", bookmark_name))
|
||||
.default_text(display_text);
|
||||
|
||||
let paragraph = Paragraph::new().add_complex_field(field);
|
||||
|
||||
Ok(docx.add_paragraph(paragraph))
|
||||
}
|
||||
|
||||
/// Add document properties and metadata
|
||||
pub fn set_document_properties(&self, docx: Docx, properties: DocumentProperties) -> Result<Docx> {
|
||||
let docx = docx
|
||||
.title(&properties.title)
|
||||
.subject(&properties.subject)
|
||||
.creator(&properties.author)
|
||||
.keywords(&properties.keywords.join(", "))
|
||||
.description(&properties.description);
|
||||
|
||||
if let Some(company) = properties.company {
|
||||
docx.company(&company);
|
||||
}
|
||||
|
||||
if let Some(manager) = properties.manager {
|
||||
docx.manager(&manager);
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Add a custom styled section
|
||||
pub fn add_section(&self, docx: Docx, section_config: SectionConfig) -> Result<Docx> {
|
||||
let mut section = SectionProperty::new();
|
||||
|
||||
// Page size
|
||||
match section_config.page_size {
|
||||
PageSize::A4 => {
|
||||
section = section.page_size(11906, 16838); // A4 in twips
|
||||
}
|
||||
PageSize::Letter => {
|
||||
section = section.page_size(12240, 15840); // Letter in twips
|
||||
}
|
||||
PageSize::Legal => {
|
||||
section = section.page_size(12240, 20160); // Legal in twips
|
||||
}
|
||||
PageSize::A3 => {
|
||||
section = section.page_size(16838, 23811); // A3 in twips
|
||||
}
|
||||
}
|
||||
|
||||
// Orientation
|
||||
if section_config.landscape {
|
||||
section = section.page_size(
|
||||
section.page_size.1,
|
||||
section.page_size.0
|
||||
);
|
||||
}
|
||||
|
||||
// Margins (convert mm to twips: 1mm = 56.7 twips)
|
||||
section = section.page_margin(
|
||||
PageMargin::new()
|
||||
.top((section_config.margins.top * 56.7) as i32)
|
||||
.bottom((section_config.margins.bottom * 56.7) as i32)
|
||||
.left((section_config.margins.left * 56.7) as i32)
|
||||
.right((section_config.margins.right * 56.7) as i32)
|
||||
.header((section_config.margins.header * 56.7) as i32)
|
||||
.footer((section_config.margins.footer * 56.7) as i32)
|
||||
);
|
||||
|
||||
// Columns
|
||||
if section_config.columns > 1 {
|
||||
section = section.columns(section_config.columns);
|
||||
}
|
||||
|
||||
Ok(docx.add_section(section))
|
||||
}
|
||||
|
||||
/// Add a watermark
|
||||
pub fn add_watermark(&self, docx: Docx, text: &str, style: WatermarkStyle) -> Result<Docx> {
|
||||
let watermark = match style {
|
||||
WatermarkStyle::Diagonal => {
|
||||
Run::new()
|
||||
.add_text(text)
|
||||
.size(144) // Large size
|
||||
.color("C0C0C0") // Light gray
|
||||
.bold()
|
||||
}
|
||||
WatermarkStyle::Horizontal => {
|
||||
Run::new()
|
||||
.add_text(text)
|
||||
.size(100)
|
||||
.color("E0E0E0")
|
||||
}
|
||||
};
|
||||
|
||||
// Watermarks are typically added to headers
|
||||
let header = Header::new().add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(watermark)
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
Ok(docx.header(header))
|
||||
}
|
||||
|
||||
/// Add footnote
|
||||
pub fn add_footnote(&self, docx: Docx, reference_text: &str, footnote_text: &str) -> Result<Docx> {
|
||||
let footnote_id = Uuid::new_v4().to_string();
|
||||
|
||||
let footnote = Footnote::new(&footnote_id)
|
||||
.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text(footnote_text))
|
||||
);
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_run(Run::new().add_text(reference_text))
|
||||
.add_footnote_reference(&footnote_id);
|
||||
|
||||
Ok(docx.add_paragraph(paragraph).add_footnote(footnote))
|
||||
}
|
||||
|
||||
/// Add endnote
|
||||
pub fn add_endnote(&self, docx: Docx, reference_text: &str, endnote_text: &str) -> Result<Docx> {
|
||||
let endnote_id = Uuid::new_v4().to_string();
|
||||
|
||||
let endnote = Endnote::new(&endnote_id)
|
||||
.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text(endnote_text))
|
||||
);
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_run(Run::new().add_text(reference_text))
|
||||
.add_endnote_reference(&endnote_id);
|
||||
|
||||
Ok(docx.add_paragraph(paragraph).add_endnote(endnote))
|
||||
}
|
||||
|
||||
/// Add custom styles
|
||||
pub fn add_custom_style(&self, docx: Docx, style: CustomStyle) -> Result<Docx> {
|
||||
let style_def = Style::new(&style.id, StyleType::Paragraph)
|
||||
.name(&style.name)
|
||||
.based_on(&style.based_on.unwrap_or_else(|| "Normal".to_string()));
|
||||
|
||||
let mut paragraph_property = ParagraphProperty::new();
|
||||
|
||||
if let Some(spacing) = style.spacing {
|
||||
paragraph_property = paragraph_property
|
||||
.line_spacing(LineSpacing::new(SpacingType::Auto, spacing.before, spacing.after));
|
||||
}
|
||||
|
||||
if let Some(indent) = style.indent {
|
||||
paragraph_property = paragraph_property
|
||||
.indent(Some(indent.left), Some(indent.right), Some(indent.first_line), None);
|
||||
}
|
||||
|
||||
let mut run_property = RunProperty::new();
|
||||
|
||||
if let Some(font) = style.font {
|
||||
run_property = run_property.fonts(RunFonts::new().ascii(&font).east_asia(&font));
|
||||
}
|
||||
|
||||
if let Some(size) = style.size {
|
||||
run_property = run_property.size(size);
|
||||
}
|
||||
|
||||
if style.bold {
|
||||
run_property = run_property.bold();
|
||||
}
|
||||
|
||||
if style.italic {
|
||||
run_property = run_property.italic();
|
||||
}
|
||||
|
||||
if let Some(color) = style.color {
|
||||
run_property = run_property.color(&color);
|
||||
}
|
||||
|
||||
let style_def = style_def
|
||||
.paragraph_property(paragraph_property)
|
||||
.run_property(run_property);
|
||||
|
||||
Ok(docx.add_style(style_def))
|
||||
}
|
||||
|
||||
/// Mail merge functionality
|
||||
pub fn prepare_mail_merge_template(&self, docx: Docx, fields: Vec<String>) -> Result<Docx> {
|
||||
let mut docx = docx;
|
||||
|
||||
for field in fields {
|
||||
let merge_field = ComplexField::new()
|
||||
.instruction(&format!("MERGEFIELD {} \\* MERGEFORMAT", field))
|
||||
.default_text(&format!("«{}»", field));
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_complex_field(merge_field);
|
||||
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
}
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
/// Add comments (annotations)
|
||||
pub fn add_comment(&self, docx: Docx, text: &str, comment: &str, author: &str) -> Result<Docx> {
|
||||
let comment_id = Uuid::new_v4().to_string();
|
||||
let date = Utc::now();
|
||||
|
||||
let comment_obj = Comment::new(&comment_id, author)
|
||||
.date(date)
|
||||
.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text(comment))
|
||||
);
|
||||
|
||||
let comment_range_start = CommentRangeStart::new(&comment_id);
|
||||
let comment_range_end = CommentRangeEnd::new(&comment_id);
|
||||
let comment_reference = CommentReference::new(&comment_id);
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_comment_range_start(comment_range_start)
|
||||
.add_run(Run::new().add_text(text))
|
||||
.add_comment_range_end(comment_range_end)
|
||||
.add_run(Run::new().add_comment_reference(comment_reference));
|
||||
|
||||
Ok(docx.add_paragraph(paragraph).add_comment(comment_obj))
|
||||
}
|
||||
|
||||
// Template helper methods
|
||||
|
||||
fn apply_business_letter_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Add sender info placeholder
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Your Name]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[Your Address]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[City, State ZIP]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[Your Email]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[Your Phone]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Date
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Date]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Recipient info
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Recipient Name]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[Title]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[Company]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[Address]"))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_text("[City, State ZIP]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Salutation
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("Dear [Recipient Name]:"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Body placeholder
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Letter body paragraph 1]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Letter body paragraph 2]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Letter body paragraph 3]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Closing
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("Sincerely,"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Your Name]"))
|
||||
);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
fn apply_resume_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Name header
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[YOUR NAME]").size(32).bold())
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
// Contact info
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Email] | [Phone] | [LinkedIn] | [Location]").size(22))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new().add_run(Run::new().add_text("").size(12)));
|
||||
|
||||
// Professional Summary
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("PROFESSIONAL SUMMARY").size(24).bold())
|
||||
.style("Heading2")
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[2-3 lines summarizing your experience and key skills]"))
|
||||
);
|
||||
|
||||
// Experience
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("EXPERIENCE").size(24).bold())
|
||||
.style("Heading2")
|
||||
);
|
||||
|
||||
// Education
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("EDUCATION").size(24).bold())
|
||||
.style("Heading2")
|
||||
);
|
||||
|
||||
// Skills
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("SKILLS").size(24).bold())
|
||||
.style("Heading2")
|
||||
);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
fn apply_report_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Title page
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text(""))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[REPORT TITLE]").size(36).bold())
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Subtitle or Description]").size(24))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
.add_run(Run::new().add_break(BreakType::TextWrapping))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("Prepared by:").size(20))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Author Name]").size(20))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Date]").size(20))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
// Page break
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_break(BreakType::Page))
|
||||
);
|
||||
|
||||
// Table of Contents placeholder
|
||||
docx = self.add_table_of_contents(docx)?;
|
||||
|
||||
// Executive Summary
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("Executive Summary").size(28).bold())
|
||||
.style("Heading1")
|
||||
);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
fn apply_invoice_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Company header
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[COMPANY NAME]").size(32).bold())
|
||||
.align(AlignmentType::Right)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("INVOICE").size(28).bold())
|
||||
.align(AlignmentType::Right)
|
||||
);
|
||||
|
||||
// Invoice details table
|
||||
let invoice_info = Table::new(vec![
|
||||
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("Invoice #:"))),
|
||||
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("[INV-0001]"))),
|
||||
])
|
||||
.add_row(TableRow::new(vec![
|
||||
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("Date:"))),
|
||||
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("[Date]"))),
|
||||
]))
|
||||
.add_row(TableRow::new(vec![
|
||||
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("Due Date:"))),
|
||||
TableCell::new().add_paragraph(Paragraph::new().add_run(Run::new().add_text("[Due Date]"))),
|
||||
]));
|
||||
|
||||
docx = docx.add_table(invoice_info);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
fn apply_contract_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Contract title
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[CONTRACT TYPE] AGREEMENT").size(28).bold())
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Parties
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("This Agreement is entered into as of [Date] between:"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Party 1 Name], a [Entity Type] (\"Party 1\")"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("and"))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Party 2 Name], a [Entity Type] (\"Party 2\")"))
|
||||
);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
fn apply_memo_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Memo header
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("MEMORANDUM").size(24).bold())
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(Paragraph::new());
|
||||
|
||||
// Memo fields
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("TO: ").bold())
|
||||
.add_run(Run::new().add_text("[Recipient(s)]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("FROM: ").bold())
|
||||
.add_run(Run::new().add_text("[Sender]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("DATE: ").bold())
|
||||
.add_run(Run::new().add_text("[Date]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("SUBJECT: ").bold())
|
||||
.add_run(Run::new().add_text("[Subject]"))
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("_").repeat(70))
|
||||
);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
|
||||
fn apply_newsletter_template(&self, mut docx: Docx) -> Result<Docx> {
|
||||
// Newsletter header
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[NEWSLETTER TITLE]").size(36).bold())
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
docx = docx.add_paragraph(
|
||||
Paragraph::new()
|
||||
.add_run(Run::new().add_text("[Issue #] | [Date]").size(18))
|
||||
.align(AlignmentType::Center)
|
||||
);
|
||||
|
||||
// Two-column layout simulation
|
||||
let columns = SectionProperty::new().columns(2);
|
||||
docx = docx.add_section(columns);
|
||||
|
||||
Ok(docx)
|
||||
}
|
||||
}
|
||||
|
||||
// Supporting types
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum DocumentTemplate {
|
||||
BusinessLetter,
|
||||
Resume,
|
||||
Report,
|
||||
Invoice,
|
||||
Contract,
|
||||
Memo,
|
||||
Newsletter,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DocumentProperties {
|
||||
pub title: String,
|
||||
pub subject: String,
|
||||
pub author: String,
|
||||
pub keywords: Vec<String>,
|
||||
pub description: String,
|
||||
pub company: Option<String>,
|
||||
pub manager: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SectionConfig {
|
||||
pub page_size: PageSize,
|
||||
pub landscape: bool,
|
||||
pub margins: Margins,
|
||||
pub columns: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum PageSize {
|
||||
A4,
|
||||
Letter,
|
||||
Legal,
|
||||
A3,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Margins {
|
||||
pub top: f32,
|
||||
pub bottom: f32,
|
||||
pub left: f32,
|
||||
pub right: f32,
|
||||
pub header: f32,
|
||||
pub footer: f32,
|
||||
}
|
||||
|
||||
impl Default for Margins {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
top: 25.4, // 1 inch in mm
|
||||
bottom: 25.4,
|
||||
left: 25.4,
|
||||
right: 25.4,
|
||||
header: 12.7, // 0.5 inch
|
||||
footer: 12.7,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ChartType {
|
||||
Bar,
|
||||
Column,
|
||||
Line,
|
||||
Pie,
|
||||
Area,
|
||||
Scatter,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChartData {
|
||||
pub title: String,
|
||||
pub categories: Vec<String>,
|
||||
pub series: Vec<ChartSeries>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChartSeries {
|
||||
pub name: String,
|
||||
pub values: Vec<f64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum WatermarkStyle {
|
||||
Diagonal,
|
||||
Horizontal,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CustomStyle {
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub based_on: Option<String>,
|
||||
pub font: Option<String>,
|
||||
pub size: Option<usize>,
|
||||
pub bold: bool,
|
||||
pub italic: bool,
|
||||
pub color: Option<String>,
|
||||
pub spacing: Option<StyleSpacing>,
|
||||
pub indent: Option<StyleIndent>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StyleSpacing {
|
||||
pub before: i32,
|
||||
pub after: i32,
|
||||
pub line: f32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StyleIndent {
|
||||
pub left: i32,
|
||||
pub right: i32,
|
||||
pub first_line: i32,
|
||||
}
|
||||
@@ -0,0 +1,435 @@
|
||||
use anyhow::{Context, Result};
|
||||
use image::{DynamicImage, ImageFormat, Rgba, RgbaImage};
|
||||
use printpdf::*;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufWriter, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use tempfile::NamedTempFile;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::pure_converter::PureRustConverter;
|
||||
|
||||
pub struct DocumentConverter {
|
||||
pure_converter: PureRustConverter,
|
||||
prefer_external_tools: bool,
|
||||
}
|
||||
|
||||
impl DocumentConverter {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
pure_converter: PureRustConverter::new(),
|
||||
prefer_external_tools: false, // Default to pure Rust implementation
|
||||
}
|
||||
}
|
||||
|
||||
pub fn docx_to_pdf(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
|
||||
if self.prefer_external_tools {
|
||||
// Try external tools first if preferred
|
||||
// Method 1: Try LibreOffice if available
|
||||
if self.try_libreoffice_conversion(docx_path, pdf_path).is_ok() {
|
||||
info!("Successfully converted DOCX to PDF using LibreOffice");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Method 2: Try unoconv if available
|
||||
if self.try_unoconv_conversion(docx_path, pdf_path).is_ok() {
|
||||
info!("Successfully converted DOCX to PDF using unoconv");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
// Use pure Rust implementation (default)
|
||||
self.pure_converter.docx_to_pdf_pure(docx_path, pdf_path)?;
|
||||
info!("Successfully converted DOCX to PDF using pure Rust implementation");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_libreoffice_conversion(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
|
||||
let output = Command::new("libreoffice")
|
||||
.args(&[
|
||||
"--headless",
|
||||
"--invisible",
|
||||
"--nodefault",
|
||||
"--nolockcheck",
|
||||
"--nologo",
|
||||
"--norestore",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
"--outdir",
|
||||
pdf_path.parent().unwrap().to_str().unwrap(),
|
||||
docx_path.to_str().unwrap(),
|
||||
])
|
||||
.output();
|
||||
|
||||
match output {
|
||||
Ok(output) if output.status.success() => {
|
||||
// LibreOffice creates the PDF with the same base name
|
||||
let temp_pdf = pdf_path.parent().unwrap()
|
||||
.join(docx_path.file_stem().unwrap())
|
||||
.with_extension("pdf");
|
||||
|
||||
if temp_pdf != pdf_path {
|
||||
fs::rename(&temp_pdf, pdf_path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Ok(output) => {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("LibreOffice conversion failed: {}", stderr)
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("LibreOffice not available: {}", e);
|
||||
anyhow::bail!("LibreOffice not available")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn try_unoconv_conversion(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
|
||||
let output = Command::new("unoconv")
|
||||
.args(&[
|
||||
"-f", "pdf",
|
||||
"-o", pdf_path.to_str().unwrap(),
|
||||
docx_path.to_str().unwrap(),
|
||||
])
|
||||
.output();
|
||||
|
||||
match output {
|
||||
Ok(output) if output.status.success() => Ok(()),
|
||||
Ok(output) => {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("unoconv conversion failed: {}", stderr)
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("unoconv not available: {}", e);
|
||||
anyhow::bail!("unoconv not available")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn basic_docx_to_pdf(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
|
||||
// Extract text from DOCX
|
||||
let text = dotext::extract_text(docx_path)
|
||||
.with_context(|| format!("Failed to extract text from {:?}", docx_path))?;
|
||||
|
||||
// Create a basic PDF with the extracted text
|
||||
let (doc, page1, layer1) = PdfDocument::new("Document", Mm(210.0), Mm(297.0), "Layer 1");
|
||||
let current_layer = doc.get_page(page1).get_layer(layer1);
|
||||
|
||||
// Load a basic font
|
||||
let font = doc.add_builtin_font(BuiltinFont::Helvetica)?;
|
||||
|
||||
// Split text into lines and add to PDF
|
||||
let lines: Vec<&str> = text.text.lines().collect();
|
||||
let mut y_position = Mm(280.0);
|
||||
let line_height = Mm(5.0);
|
||||
|
||||
for line in lines {
|
||||
if y_position < Mm(20.0) {
|
||||
// Add new page if needed
|
||||
let (page, layer) = doc.add_page(Mm(210.0), Mm(297.0), "Page layer");
|
||||
let current_layer = doc.get_page(page).get_layer(layer);
|
||||
y_position = Mm(280.0);
|
||||
}
|
||||
|
||||
current_layer.use_text(line, 12.0, Mm(10.0), y_position, &font);
|
||||
y_position -= line_height;
|
||||
}
|
||||
|
||||
doc.save(&mut BufWriter::new(File::create(pdf_path)?))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pdf_to_images(
|
||||
&self,
|
||||
pdf_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
dpi: u32,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
// Try multiple methods for PDF to image conversion
|
||||
|
||||
// Method 1: Try pdftoppm if available
|
||||
if let Ok(images) = self.try_pdftoppm_conversion(pdf_path, output_dir, format, dpi) {
|
||||
info!("Successfully converted PDF to images using pdftoppm");
|
||||
return Ok(images);
|
||||
}
|
||||
|
||||
// Method 2: Try ImageMagick if available
|
||||
if let Ok(images) = self.try_imagemagick_conversion(pdf_path, output_dir, format, dpi) {
|
||||
info!("Successfully converted PDF to images using ImageMagick");
|
||||
return Ok(images);
|
||||
}
|
||||
|
||||
// Method 3: Try Ghostscript if available
|
||||
if let Ok(images) = self.try_ghostscript_conversion(pdf_path, output_dir, format, dpi) {
|
||||
info!("Successfully converted PDF to images using Ghostscript");
|
||||
return Ok(images);
|
||||
}
|
||||
|
||||
anyhow::bail!("No PDF to image converter available. Please install pdftoppm, ImageMagick, or Ghostscript")
|
||||
}
|
||||
|
||||
fn try_pdftoppm_conversion(
|
||||
&self,
|
||||
pdf_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
dpi: u32,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
fs::create_dir_all(output_dir)?;
|
||||
|
||||
let output_prefix = output_dir.join("page");
|
||||
let format_arg = match format {
|
||||
ImageFormat::Png => "-png",
|
||||
ImageFormat::Jpeg => "-jpeg",
|
||||
_ => "-png",
|
||||
};
|
||||
|
||||
let output = Command::new("pdftoppm")
|
||||
.args(&[
|
||||
format_arg,
|
||||
"-r", &dpi.to_string(),
|
||||
pdf_path.to_str().unwrap(),
|
||||
output_prefix.to_str().unwrap(),
|
||||
])
|
||||
.output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("pdftoppm failed: {}", stderr);
|
||||
}
|
||||
|
||||
// Collect generated image files
|
||||
let extension = match format {
|
||||
ImageFormat::Png => "png",
|
||||
ImageFormat::Jpeg => "jpg",
|
||||
_ => "png",
|
||||
};
|
||||
|
||||
let mut images = Vec::new();
|
||||
for entry in fs::read_dir(output_dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if path.extension() == Some(std::ffi::OsStr::new(extension)) {
|
||||
images.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
images.sort();
|
||||
Ok(images)
|
||||
}
|
||||
|
||||
fn try_imagemagick_conversion(
|
||||
&self,
|
||||
pdf_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
dpi: u32,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
fs::create_dir_all(output_dir)?;
|
||||
|
||||
let extension = match format {
|
||||
ImageFormat::Png => "png",
|
||||
ImageFormat::Jpeg => "jpg",
|
||||
_ => "png",
|
||||
};
|
||||
|
||||
let output_pattern = output_dir.join(format!("page-%03d.{}", extension));
|
||||
|
||||
let output = Command::new("convert")
|
||||
.args(&[
|
||||
"-density", &dpi.to_string(),
|
||||
pdf_path.to_str().unwrap(),
|
||||
"-quality", "100",
|
||||
output_pattern.to_str().unwrap(),
|
||||
])
|
||||
.output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("ImageMagick convert failed: {}", stderr);
|
||||
}
|
||||
|
||||
// Collect generated image files
|
||||
let mut images = Vec::new();
|
||||
for entry in fs::read_dir(output_dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if path.extension() == Some(std::ffi::OsStr::new(extension)) {
|
||||
images.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
images.sort();
|
||||
Ok(images)
|
||||
}
|
||||
|
||||
fn try_ghostscript_conversion(
|
||||
&self,
|
||||
pdf_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
dpi: u32,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
fs::create_dir_all(output_dir)?;
|
||||
|
||||
let device = match format {
|
||||
ImageFormat::Png => "png16m",
|
||||
ImageFormat::Jpeg => "jpeg",
|
||||
_ => "png16m",
|
||||
};
|
||||
|
||||
let extension = match format {
|
||||
ImageFormat::Png => "png",
|
||||
ImageFormat::Jpeg => "jpg",
|
||||
_ => "png",
|
||||
};
|
||||
|
||||
let output_pattern = output_dir.join(format!("page-%03d.{}", extension));
|
||||
|
||||
let output = Command::new("gs")
|
||||
.args(&[
|
||||
"-dNOPAUSE",
|
||||
"-dBATCH",
|
||||
"-sDEVICE", device,
|
||||
&format!("-r{}", dpi),
|
||||
"-dTextAlphaBits=4",
|
||||
"-dGraphicsAlphaBits=4",
|
||||
&format!("-sOutputFile={}", output_pattern.to_str().unwrap()),
|
||||
pdf_path.to_str().unwrap(),
|
||||
])
|
||||
.output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("Ghostscript failed: {}", stderr);
|
||||
}
|
||||
|
||||
// Collect generated image files
|
||||
let mut images = Vec::new();
|
||||
for entry in fs::read_dir(output_dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if path.extension() == Some(std::ffi::OsStr::new(extension)) {
|
||||
images.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
images.sort();
|
||||
Ok(images)
|
||||
}
|
||||
|
||||
pub fn docx_to_images(
|
||||
&self,
|
||||
docx_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
dpi: u32,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
// First convert DOCX to PDF
|
||||
let temp_pdf = NamedTempFile::new()?.into_temp_path();
|
||||
self.docx_to_pdf(docx_path, &temp_pdf)?;
|
||||
|
||||
// Then convert PDF to images
|
||||
let images = self.pdf_to_images(&temp_pdf, output_dir, format, dpi)?;
|
||||
|
||||
Ok(images)
|
||||
}
|
||||
|
||||
pub fn create_thumbnail(
|
||||
&self,
|
||||
image_path: &Path,
|
||||
output_path: &Path,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<()> {
|
||||
let img = image::open(image_path)
|
||||
.with_context(|| format!("Failed to open image {:?}", image_path))?;
|
||||
|
||||
let thumbnail = img.thumbnail(width, height);
|
||||
thumbnail.save(output_path)
|
||||
.with_context(|| format!("Failed to save thumbnail to {:?}", output_path))?;
|
||||
|
||||
info!("Created thumbnail {}x{} at {:?}", width, height, output_path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn merge_pdfs(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
|
||||
// Try using pdftk if available
|
||||
if self.try_pdftk_merge(pdf_paths, output_path).is_ok() {
|
||||
info!("Successfully merged PDFs using pdftk");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Fallback to lopdf for merging
|
||||
self.merge_pdfs_with_lopdf(pdf_paths, output_path)?;
|
||||
info!("Successfully merged PDFs using lopdf");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn try_pdftk_merge(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
|
||||
let mut args = Vec::new();
|
||||
for path in pdf_paths {
|
||||
args.push(path.to_str().unwrap());
|
||||
}
|
||||
args.push("cat");
|
||||
args.push("output");
|
||||
args.push(output_path.to_str().unwrap());
|
||||
|
||||
let output = Command::new("pdftk")
|
||||
.args(&args)
|
||||
.output()?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("pdftk merge failed: {}", stderr);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_pdfs_with_lopdf(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
|
||||
use lopdf::{Document, Object, ObjectId};
|
||||
|
||||
let mut merged = Document::new();
|
||||
merged.version = "1.5".to_string();
|
||||
|
||||
for pdf_path in pdf_paths {
|
||||
let mut doc = Document::load(pdf_path)?;
|
||||
|
||||
// Merge pages
|
||||
for page_id in doc.get_pages().values() {
|
||||
merged.add_object(doc.get_object(*page_id)?.clone());
|
||||
}
|
||||
}
|
||||
|
||||
merged.save(output_path)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn split_pdf(&self, pdf_path: &Path, output_dir: &Path) -> Result<Vec<PathBuf>> {
|
||||
use lopdf::Document;
|
||||
|
||||
fs::create_dir_all(output_dir)?;
|
||||
|
||||
let doc = Document::load(pdf_path)?;
|
||||
let pages = doc.get_pages();
|
||||
let mut output_paths = Vec::new();
|
||||
|
||||
for (i, (_, page_id)) in pages.iter().enumerate() {
|
||||
let mut single_page = Document::new();
|
||||
single_page.version = doc.version.clone();
|
||||
|
||||
// Clone the page to the new document
|
||||
single_page.add_object(doc.get_object(*page_id)?.clone());
|
||||
|
||||
let output_path = output_dir.join(format!("page_{:03}.pdf", i + 1));
|
||||
single_page.save(&output_path)?;
|
||||
output_paths.push(output_path);
|
||||
}
|
||||
|
||||
info!("Split PDF into {} pages", output_paths.len());
|
||||
Ok(output_paths)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,408 @@
|
||||
use anyhow::{Context, Result};
|
||||
use docx_rs::*;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tempfile::NamedTempFile;
|
||||
use uuid::Uuid;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DocxMetadata {
|
||||
pub id: String,
|
||||
pub path: PathBuf,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub modified_at: DateTime<Utc>,
|
||||
pub size_bytes: u64,
|
||||
pub page_count: Option<usize>,
|
||||
pub word_count: Option<usize>,
|
||||
pub author: Option<String>,
|
||||
pub title: Option<String>,
|
||||
pub subject: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DocxStyle {
|
||||
pub font_family: Option<String>,
|
||||
pub font_size: Option<usize>,
|
||||
pub bold: Option<bool>,
|
||||
pub italic: Option<bool>,
|
||||
pub underline: Option<bool>,
|
||||
pub color: Option<String>,
|
||||
pub alignment: Option<String>,
|
||||
pub line_spacing: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TableData {
|
||||
pub rows: Vec<Vec<String>>,
|
||||
pub headers: Option<Vec<String>>,
|
||||
pub border_style: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ImageData {
|
||||
pub data: Vec<u8>,
|
||||
pub width: Option<u32>,
|
||||
pub height: Option<u32>,
|
||||
pub alt_text: Option<String>,
|
||||
}
|
||||
|
||||
pub struct DocxHandler {
|
||||
temp_dir: PathBuf,
|
||||
pub documents: std::collections::HashMap<String, DocxMetadata>,
|
||||
}
|
||||
|
||||
impl DocxHandler {
|
||||
pub fn new() -> Result<Self> {
|
||||
let temp_dir = std::env::temp_dir().join("docx-mcp");
|
||||
fs::create_dir_all(&temp_dir)?;
|
||||
|
||||
Ok(Self {
|
||||
temp_dir,
|
||||
documents: std::collections::HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn new_with_temp_dir(temp_dir: &Path) -> Result<Self> {
|
||||
let temp_dir = temp_dir.to_path_buf();
|
||||
fs::create_dir_all(&temp_dir)?;
|
||||
|
||||
Ok(Self {
|
||||
temp_dir,
|
||||
documents: std::collections::HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn create_document(&mut self) -> Result<String> {
|
||||
let doc_id = Uuid::new_v4().to_string();
|
||||
let doc_path = self.temp_dir.join(format!("{}.docx", doc_id));
|
||||
|
||||
let docx = Docx::new();
|
||||
let file = File::create(&doc_path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
let metadata = DocxMetadata {
|
||||
id: doc_id.clone(),
|
||||
path: doc_path,
|
||||
created_at: Utc::now(),
|
||||
modified_at: Utc::now(),
|
||||
size_bytes: 0,
|
||||
page_count: Some(1),
|
||||
word_count: Some(0),
|
||||
author: None,
|
||||
title: None,
|
||||
subject: None,
|
||||
};
|
||||
|
||||
self.documents.insert(doc_id.clone(), metadata);
|
||||
info!("Created new document with ID: {}", doc_id);
|
||||
|
||||
Ok(doc_id)
|
||||
}
|
||||
|
||||
pub fn open_document(&mut self, path: &Path) -> Result<String> {
|
||||
let doc_id = Uuid::new_v4().to_string();
|
||||
let doc_path = self.temp_dir.join(format!("{}.docx", doc_id));
|
||||
|
||||
fs::copy(path, &doc_path)
|
||||
.with_context(|| format!("Failed to copy document from {:?}", path))?;
|
||||
|
||||
let file_metadata = fs::metadata(&doc_path)?;
|
||||
|
||||
let metadata = DocxMetadata {
|
||||
id: doc_id.clone(),
|
||||
path: doc_path,
|
||||
created_at: Utc::now(),
|
||||
modified_at: Utc::now(),
|
||||
size_bytes: file_metadata.len(),
|
||||
page_count: None,
|
||||
word_count: None,
|
||||
author: None,
|
||||
title: None,
|
||||
subject: None,
|
||||
};
|
||||
|
||||
self.documents.insert(doc_id.clone(), metadata);
|
||||
info!("Opened document from {:?} with ID: {}", path, doc_id);
|
||||
|
||||
Ok(doc_id)
|
||||
}
|
||||
|
||||
pub fn add_paragraph(&mut self, doc_id: &str, text: &str, style: Option<DocxStyle>) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let mut paragraph = Paragraph::new().add_run(Run::new().add_text(text));
|
||||
|
||||
if let Some(style) = style {
|
||||
let mut run = Run::new().add_text(text);
|
||||
|
||||
if let Some(size) = style.font_size {
|
||||
run = run.size(size);
|
||||
}
|
||||
if style.bold == Some(true) {
|
||||
run = run.bold();
|
||||
}
|
||||
if style.italic == Some(true) {
|
||||
run = run.italic();
|
||||
}
|
||||
if style.underline == Some(true) {
|
||||
run = run.underline("single");
|
||||
}
|
||||
if let Some(color) = style.color {
|
||||
run = run.color(color);
|
||||
}
|
||||
|
||||
paragraph = Paragraph::new().add_run(run);
|
||||
|
||||
if let Some(alignment) = style.alignment {
|
||||
paragraph = match alignment.as_str() {
|
||||
"left" => paragraph.align(AlignmentType::Left),
|
||||
"center" => paragraph.align(AlignmentType::Center),
|
||||
"right" => paragraph.align(AlignmentType::Right),
|
||||
"justify" => paragraph.align(AlignmentType::Justified),
|
||||
_ => paragraph,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Added paragraph to document {}", doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_heading(&mut self, doc_id: &str, text: &str, level: usize) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let heading_style = match level {
|
||||
1 => "Heading1",
|
||||
2 => "Heading2",
|
||||
3 => "Heading3",
|
||||
4 => "Heading4",
|
||||
5 => "Heading5",
|
||||
6 => "Heading6",
|
||||
_ => "Heading1",
|
||||
};
|
||||
|
||||
let paragraph = Paragraph::new()
|
||||
.add_run(Run::new().add_text(text))
|
||||
.style(heading_style);
|
||||
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Added heading level {} to document {}", level, doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_table(&mut self, doc_id: &str, table_data: TableData) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let col_count = table_data.rows.get(0).map(|r| r.len()).unwrap_or(0);
|
||||
let mut table = Table::new(vec![TableCell::new(); col_count]);
|
||||
|
||||
for row_data in table_data.rows {
|
||||
let mut cells = Vec::new();
|
||||
for cell_text in row_data {
|
||||
let cell = TableCell::new()
|
||||
.add_paragraph(Paragraph::new().add_run(Run::new().add_text(cell_text)));
|
||||
cells.push(cell);
|
||||
}
|
||||
|
||||
while cells.len() < col_count {
|
||||
cells.push(TableCell::new());
|
||||
}
|
||||
|
||||
table = table.add_row(TableRow::new(cells));
|
||||
}
|
||||
|
||||
docx = docx.add_table(table);
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Added table to document {}", doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_list(&mut self, doc_id: &str, items: Vec<String>, ordered: bool) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let numbering_id = if ordered { 1 } else { 2 };
|
||||
|
||||
for item in items {
|
||||
let paragraph = Paragraph::new()
|
||||
.add_run(Run::new().add_text(item))
|
||||
.numbering(NumberingId::new(numbering_id), IndentLevel::new(0));
|
||||
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
}
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Added {} list to document {}", if ordered { "ordered" } else { "unordered" }, doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_page_break(&mut self, doc_id: &str) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let paragraph = Paragraph::new().add_run(Run::new().add_break(BreakType::Page));
|
||||
docx = docx.add_paragraph(paragraph);
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Added page break to document {}", doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_header(&mut self, doc_id: &str, text: &str) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let header = Header::new().add_paragraph(
|
||||
Paragraph::new().add_run(Run::new().add_text(text))
|
||||
);
|
||||
|
||||
docx = docx.header(header);
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Set header for document {}", doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_footer(&mut self, doc_id: &str, text: &str) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
let mut file = File::open(&metadata.path)?;
|
||||
let mut buffer = Vec::new();
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
let mut docx = Docx::from_reader(&buffer[..])?;
|
||||
|
||||
let footer = Footer::new().add_paragraph(
|
||||
Paragraph::new().add_run(Run::new().add_text(text))
|
||||
);
|
||||
|
||||
docx = docx.footer(footer);
|
||||
|
||||
let file = File::create(&metadata.path)?;
|
||||
docx.build().pack(file)?;
|
||||
|
||||
info!("Set footer for document {}", doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn find_and_replace(&mut self, doc_id: &str, find_text: &str, replace_text: &str) -> Result<usize> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
// Note: This is a simplified implementation
|
||||
// Real implementation would need to parse the DOCX XML structure
|
||||
// and perform replacements while preserving formatting
|
||||
|
||||
warn!("Find and replace operation requires advanced XML manipulation");
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
pub fn extract_text(&self, doc_id: &str) -> Result<String> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
// Use pure Rust text extraction
|
||||
use crate::pure_converter::PureRustConverter;
|
||||
let converter = PureRustConverter::new();
|
||||
let text = converter.extract_text_from_docx(&metadata.path)
|
||||
.with_context(|| format!("Failed to extract text from document {}", doc_id))?;
|
||||
|
||||
Ok(text)
|
||||
}
|
||||
|
||||
pub fn get_metadata(&self, doc_id: &str) -> Result<DocxMetadata> {
|
||||
self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))
|
||||
.map(|m| m.clone())
|
||||
}
|
||||
|
||||
pub fn save_document(&self, doc_id: &str, output_path: &Path) -> Result<()> {
|
||||
let metadata = self.documents.get(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
fs::copy(&metadata.path, output_path)
|
||||
.with_context(|| format!("Failed to save document to {:?}", output_path))?;
|
||||
|
||||
info!("Saved document {} to {:?}", doc_id, output_path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn close_document(&mut self, doc_id: &str) -> Result<()> {
|
||||
let metadata = self.documents.remove(doc_id)
|
||||
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
|
||||
|
||||
if metadata.path.exists() {
|
||||
fs::remove_file(&metadata.path)?;
|
||||
}
|
||||
|
||||
info!("Closed document {}", doc_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn list_documents(&self) -> Vec<DocxMetadata> {
|
||||
self.documents.values().cloned().collect()
|
||||
}
|
||||
}
|
||||
+1091
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,50 @@
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
// Conditionally embed fonts if they exist
|
||||
// If fonts don't exist, we'll use empty placeholders and rely on PDF built-in fonts
|
||||
|
||||
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
|
||||
pub static LIBERATION_SANS_REGULAR: &[u8] = include_bytes!("../assets/fonts/LiberationSans-Regular.ttf");
|
||||
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
|
||||
pub static LIBERATION_SANS_REGULAR: &[u8] = &[];
|
||||
|
||||
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
|
||||
pub static LIBERATION_SANS_BOLD: &[u8] = include_bytes!("../assets/fonts/LiberationSans-Bold.ttf");
|
||||
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
|
||||
pub static LIBERATION_SANS_BOLD: &[u8] = &[];
|
||||
|
||||
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
|
||||
pub static LIBERATION_SANS_ITALIC: &[u8] = include_bytes!("../assets/fonts/LiberationSans-Italic.ttf");
|
||||
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
|
||||
pub static LIBERATION_SANS_ITALIC: &[u8] = &[];
|
||||
|
||||
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
|
||||
pub static LIBERATION_MONO_REGULAR: &[u8] = include_bytes!("../assets/fonts/LiberationMono-Regular.ttf");
|
||||
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
|
||||
pub static LIBERATION_MONO_REGULAR: &[u8] = &[];
|
||||
|
||||
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
|
||||
pub const EMBEDDED_FONT_REGULAR: &[u8] = include_bytes!("../assets/fonts/NotoSans-Regular.ttf");
|
||||
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
|
||||
pub const EMBEDDED_FONT_REGULAR: &[u8] = &[];
|
||||
|
||||
#[cfg(all(feature = "embedded-fonts", not(debug_assertions)))]
|
||||
pub const EMBEDDED_FONT_BOLD: &[u8] = include_bytes!("../assets/fonts/NotoSans-Bold.ttf");
|
||||
#[cfg(not(all(feature = "embedded-fonts", not(debug_assertions))))]
|
||||
pub const EMBEDDED_FONT_BOLD: &[u8] = &[];
|
||||
|
||||
pub struct EmbeddedFonts {
|
||||
pub regular: &'static [u8],
|
||||
pub bold: &'static [u8],
|
||||
pub italic: &'static [u8],
|
||||
pub mono: &'static [u8],
|
||||
}
|
||||
|
||||
pub static FONTS: Lazy<EmbeddedFonts> = Lazy::new(|| {
|
||||
EmbeddedFonts {
|
||||
regular: LIBERATION_SANS_REGULAR,
|
||||
bold: LIBERATION_SANS_BOLD,
|
||||
italic: LIBERATION_SANS_ITALIC,
|
||||
mono: LIBERATION_MONO_REGULAR,
|
||||
}
|
||||
});
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
use anyhow::Result;
|
||||
use mcp_server::{Server, ServerBuilder, ServerOptions};
|
||||
use mcp_core::ToolManager;
|
||||
use tracing::info;
|
||||
use tracing_subscriber::{EnvFilter, fmt, prelude::*};
|
||||
|
||||
mod docx_tools;
|
||||
mod docx_handler;
|
||||
mod converter;
|
||||
mod pure_converter;
|
||||
mod advanced_docx;
|
||||
mod security;
|
||||
|
||||
#[cfg(feature = "embedded-fonts")]
|
||||
mod fonts;
|
||||
|
||||
use docx_tools::DocxToolsProvider;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::registry()
|
||||
.with(fmt::layer())
|
||||
.with(EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
// Load security configuration from environment
|
||||
let security_config = security::SecurityConfig::from_env();
|
||||
info!("Starting DOCX MCP Server - Security: {}", security_config.get_summary());
|
||||
|
||||
let docx_provider = DocxToolsProvider::new_with_security(security_config);
|
||||
|
||||
let options = ServerOptions::default()
|
||||
.with_name("docx-mcp-server")
|
||||
.with_version("0.1.0");
|
||||
|
||||
let server = ServerBuilder::new(options)
|
||||
.with_tool_provider(docx_provider)
|
||||
.build();
|
||||
|
||||
server.run().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,423 @@
|
||||
use anyhow::{Context, Result};
|
||||
use image::{DynamicImage, ImageFormat, Rgba, RgbaImage};
|
||||
use printpdf::*;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufReader, BufWriter, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tempfile::NamedTempFile;
|
||||
use tracing::{debug, info, warn};
|
||||
use roxmltree;
|
||||
use zip::ZipArchive;
|
||||
use rusttype::{Font, Scale};
|
||||
use lopdf;
|
||||
|
||||
pub struct PureRustConverter;
|
||||
|
||||
impl PureRustConverter {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Extract text from DOCX using pure Rust XML parsing
|
||||
pub fn extract_text_from_docx(&self, docx_path: &Path) -> Result<String> {
|
||||
let file = File::open(docx_path)?;
|
||||
let mut archive = ZipArchive::new(file)?;
|
||||
|
||||
// Find the main document XML
|
||||
let mut document_xml = String::new();
|
||||
|
||||
for i in 0..archive.len() {
|
||||
let mut file = archive.by_index(i)?;
|
||||
let name = file.name().to_string();
|
||||
|
||||
if name == "word/document.xml" {
|
||||
file.read_to_string(&mut document_xml)?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if document_xml.is_empty() {
|
||||
anyhow::bail!("No document.xml found in DOCX file");
|
||||
}
|
||||
|
||||
// Parse XML and extract text
|
||||
let doc = roxmltree::Document::parse(&document_xml)?;
|
||||
let mut text = String::new();
|
||||
|
||||
// Extract text from all w:t elements
|
||||
for node in doc.descendants() {
|
||||
if node.tag_name().name() == "t" {
|
||||
if let Some(node_text) = node.text() {
|
||||
text.push_str(node_text);
|
||||
text.push(' ');
|
||||
}
|
||||
}
|
||||
// Handle line breaks
|
||||
if node.tag_name().name() == "br" || node.tag_name().name() == "p" {
|
||||
text.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
Ok(text.trim().to_string())
|
||||
}
|
||||
|
||||
/// Convert DOCX to PDF using pure Rust (no external dependencies)
|
||||
pub fn docx_to_pdf_pure(&self, docx_path: &Path, pdf_path: &Path) -> Result<()> {
|
||||
// Extract text from DOCX
|
||||
let text = self.extract_text_from_docx(docx_path)
|
||||
.with_context(|| format!("Failed to extract text from {:?}", docx_path))?;
|
||||
|
||||
// Create PDF with extracted text
|
||||
self.create_pdf_from_text(&text, pdf_path)?;
|
||||
|
||||
info!("Successfully converted DOCX to PDF using pure Rust");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a PDF from text content
|
||||
pub fn create_pdf_from_text(&self, text: &str, pdf_path: &Path) -> Result<()> {
|
||||
let (doc, page1, layer1) = PdfDocument::new("Document", Mm(210.0), Mm(297.0), "Layer 1");
|
||||
let current_layer = doc.get_page(page1).get_layer(layer1);
|
||||
|
||||
// Use embedded font or built-in font
|
||||
let font = doc.add_builtin_font(BuiltinFont::Helvetica)?;
|
||||
|
||||
// Configure text layout
|
||||
let font_size = 11.0;
|
||||
let line_height = Mm(5.0);
|
||||
let margin_left = Mm(20.0);
|
||||
let margin_top = Mm(280.0);
|
||||
let margin_bottom = Mm(20.0);
|
||||
let page_width = Mm(210.0);
|
||||
let page_height = Mm(297.0);
|
||||
let text_width = page_width - (margin_left * 2.0);
|
||||
|
||||
let lines: Vec<&str> = text.lines().collect();
|
||||
let mut current_page = page1;
|
||||
let mut current_layer = layer1;
|
||||
let mut y_position = margin_top;
|
||||
|
||||
for line in lines {
|
||||
// Check if we need a new page
|
||||
if y_position < margin_bottom {
|
||||
let (new_page, new_layer) = doc.add_page(Mm(210.0), Mm(297.0), "Page layer");
|
||||
current_page = new_page;
|
||||
current_layer = new_layer;
|
||||
y_position = margin_top;
|
||||
}
|
||||
|
||||
// Word wrap if line is too long
|
||||
let words: Vec<&str> = line.split_whitespace().collect();
|
||||
let mut current_line = String::new();
|
||||
let max_chars_per_line = 80; // Approximate
|
||||
|
||||
for word in words {
|
||||
if current_line.len() + word.len() + 1 > max_chars_per_line {
|
||||
// Write current line
|
||||
if !current_line.is_empty() {
|
||||
doc.get_page(current_page)
|
||||
.get_layer(current_layer)
|
||||
.use_text(¤t_line, font_size, margin_left, y_position, &font);
|
||||
y_position -= line_height;
|
||||
current_line.clear();
|
||||
|
||||
// Check for new page
|
||||
if y_position < margin_bottom {
|
||||
let (new_page, new_layer) = doc.add_page(Mm(210.0), Mm(297.0), "Page layer");
|
||||
current_page = new_page;
|
||||
current_layer = new_layer;
|
||||
y_position = margin_top;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !current_line.is_empty() {
|
||||
current_line.push(' ');
|
||||
}
|
||||
current_line.push_str(word);
|
||||
}
|
||||
|
||||
// Write remaining text in line
|
||||
if !current_line.is_empty() {
|
||||
doc.get_page(current_page)
|
||||
.get_layer(current_layer)
|
||||
.use_text(¤t_line, font_size, margin_left, y_position, &font);
|
||||
y_position -= line_height;
|
||||
}
|
||||
}
|
||||
|
||||
// Save PDF
|
||||
doc.save(&mut BufWriter::new(File::create(pdf_path)?))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convert PDF to images using pure Rust
|
||||
pub fn pdf_to_images_pure(
|
||||
&self,
|
||||
pdf_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
// Parse PDF
|
||||
let doc = lopdf::Document::load(pdf_path)?;
|
||||
let pages = doc.get_pages();
|
||||
|
||||
fs::create_dir_all(output_dir)?;
|
||||
let mut output_paths = Vec::new();
|
||||
|
||||
// For each page, render to image
|
||||
for (page_num, (_page_num, _page_id)) in pages.iter().enumerate() {
|
||||
// Create a blank image for the page
|
||||
// In a real implementation, you would render the PDF content
|
||||
let img = self.render_pdf_page_to_image(&doc, page_num)?;
|
||||
|
||||
// Save image
|
||||
let extension = match format {
|
||||
ImageFormat::Png => "png",
|
||||
ImageFormat::Jpeg => "jpg",
|
||||
_ => "png",
|
||||
};
|
||||
|
||||
let output_path = output_dir.join(format!("page_{:03}.{}", page_num + 1, extension));
|
||||
img.save_with_format(&output_path, format)?;
|
||||
output_paths.push(output_path);
|
||||
}
|
||||
|
||||
Ok(output_paths)
|
||||
}
|
||||
|
||||
/// Render a PDF page to image (simplified implementation)
|
||||
fn render_pdf_page_to_image(&self, _doc: &lopdf::Document, _page_num: usize) -> Result<DynamicImage> {
|
||||
// This is a simplified implementation
|
||||
// A full implementation would parse PDF content and render it
|
||||
|
||||
// Create a white image as placeholder
|
||||
let width = 1240; // A4 at 150 DPI
|
||||
let height = 1754; // A4 at 150 DPI
|
||||
|
||||
let mut img = RgbaImage::new(width, height);
|
||||
|
||||
// Fill with white background
|
||||
for pixel in img.pixels_mut() {
|
||||
*pixel = Rgba([255, 255, 255, 255]);
|
||||
}
|
||||
|
||||
// Add a simple text indicator
|
||||
// In production, you would properly render PDF content
|
||||
|
||||
Ok(DynamicImage::ImageRgba8(img))
|
||||
}
|
||||
|
||||
/// Convert DOCX to images using pure Rust
|
||||
pub fn docx_to_images_pure(
|
||||
&self,
|
||||
docx_path: &Path,
|
||||
output_dir: &Path,
|
||||
format: ImageFormat,
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
// First convert to PDF
|
||||
let temp_pdf = NamedTempFile::new()?.into_temp_path();
|
||||
self.docx_to_pdf_pure(docx_path, &temp_pdf)?;
|
||||
|
||||
// Then convert PDF to images
|
||||
self.pdf_to_images_pure(&temp_pdf, output_dir, format)
|
||||
}
|
||||
|
||||
/// Create a thumbnail from an image
|
||||
pub fn create_thumbnail(
|
||||
&self,
|
||||
image_path: &Path,
|
||||
output_path: &Path,
|
||||
width: u32,
|
||||
height: u32,
|
||||
) -> Result<()> {
|
||||
let img = image::open(image_path)
|
||||
.with_context(|| format!("Failed to open image {:?}", image_path))?;
|
||||
|
||||
let thumbnail = img.thumbnail(width, height);
|
||||
thumbnail.save(output_path)
|
||||
.with_context(|| format!("Failed to save thumbnail to {:?}", output_path))?;
|
||||
|
||||
info!("Created thumbnail {}x{} at {:?}", width, height, output_path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merge multiple PDFs using pure Rust
|
||||
pub fn merge_pdfs_pure(&self, pdf_paths: &[PathBuf], output_path: &Path) -> Result<()> {
|
||||
use lopdf::{Document, Object, ObjectId};
|
||||
|
||||
// Create a new document for merging
|
||||
let mut merged_doc = Document::with_version("1.5");
|
||||
|
||||
// Track page tree
|
||||
let mut all_pages = Vec::new();
|
||||
|
||||
for pdf_path in pdf_paths {
|
||||
let doc = Document::load(pdf_path)?;
|
||||
|
||||
// Get pages from the document
|
||||
let pages = doc.get_pages();
|
||||
|
||||
for (_page_num, page_id) in pages.iter() {
|
||||
// Clone the page object
|
||||
if let Ok(page_obj) = doc.get_object(*page_id) {
|
||||
let new_id = merged_doc.new_object_id();
|
||||
merged_doc.objects.insert(new_id, page_obj.clone());
|
||||
all_pages.push(new_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build the page tree for merged document
|
||||
let pages_id = merged_doc.new_object_id();
|
||||
let pages_dict = lopdf::dictionary! {
|
||||
"Type" => "Pages",
|
||||
"Kids" => all_pages.iter().map(|id| Object::Reference(*id)).collect::<Vec<_>>(),
|
||||
"Count" => all_pages.len() as i32,
|
||||
};
|
||||
merged_doc.objects.insert(pages_id, Object::Dictionary(pages_dict));
|
||||
|
||||
// Update catalog
|
||||
let catalog_id = merged_doc.new_object_id();
|
||||
let catalog = lopdf::dictionary! {
|
||||
"Type" => "Catalog",
|
||||
"Pages" => Object::Reference(pages_id),
|
||||
};
|
||||
merged_doc.objects.insert(catalog_id, Object::Dictionary(catalog));
|
||||
merged_doc.trailer.set("Root", Object::Reference(catalog_id));
|
||||
|
||||
// Save the merged PDF
|
||||
merged_doc.save(output_path)?;
|
||||
|
||||
info!("Successfully merged {} PDFs into {:?}", pdf_paths.len(), output_path);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Split a PDF into individual pages using pure Rust
|
||||
pub fn split_pdf_pure(&self, pdf_path: &Path, output_dir: &Path) -> Result<Vec<PathBuf>> {
|
||||
use lopdf::Document;
|
||||
|
||||
fs::create_dir_all(output_dir)?;
|
||||
|
||||
let doc = Document::load(pdf_path)?;
|
||||
let pages = doc.get_pages();
|
||||
let mut output_paths = Vec::new();
|
||||
|
||||
for (i, (_page_num, page_id)) in pages.iter().enumerate() {
|
||||
// Create a new document with just this page
|
||||
let mut single_page_doc = Document::with_version("1.5");
|
||||
|
||||
// Clone the page
|
||||
if let Ok(page_obj) = doc.get_object(*page_id) {
|
||||
let new_page_id = single_page_doc.new_object_id();
|
||||
single_page_doc.objects.insert(new_page_id, page_obj.clone());
|
||||
|
||||
// Create page tree
|
||||
let pages_id = single_page_doc.new_object_id();
|
||||
let pages_dict = lopdf::dictionary! {
|
||||
"Type" => "Pages",
|
||||
"Kids" => vec![Object::Reference(new_page_id)],
|
||||
"Count" => 1,
|
||||
};
|
||||
single_page_doc.objects.insert(pages_id, Object::Dictionary(pages_dict));
|
||||
|
||||
// Create catalog
|
||||
let catalog_id = single_page_doc.new_object_id();
|
||||
let catalog = lopdf::dictionary! {
|
||||
"Type" => "Catalog",
|
||||
"Pages" => Object::Reference(pages_id),
|
||||
};
|
||||
single_page_doc.objects.insert(catalog_id, Object::Dictionary(catalog));
|
||||
single_page_doc.trailer.set("Root", Object::Reference(catalog_id));
|
||||
|
||||
// Save the page
|
||||
let output_path = output_dir.join(format!("page_{:03}.pdf", i + 1));
|
||||
single_page_doc.save(&output_path)?;
|
||||
output_paths.push(output_path);
|
||||
}
|
||||
}
|
||||
|
||||
info!("Split PDF into {} pages", output_paths.len());
|
||||
Ok(output_paths)
|
||||
}
|
||||
|
||||
/// Parse and render markdown to PDF
|
||||
pub fn markdown_to_pdf(&self, markdown: &str, pdf_path: &Path) -> Result<()> {
|
||||
use pulldown_cmark::{Parser, Event, Tag, TagEnd};
|
||||
|
||||
let parser = Parser::new(markdown);
|
||||
let mut plain_text = String::new();
|
||||
let mut in_code_block = false;
|
||||
let mut list_depth = 0;
|
||||
|
||||
for event in parser {
|
||||
match event {
|
||||
Event::Text(text) => {
|
||||
if in_code_block {
|
||||
plain_text.push_str(" ");
|
||||
} else if list_depth > 0 {
|
||||
plain_text.push_str(&" ".repeat(list_depth));
|
||||
}
|
||||
plain_text.push_str(&text);
|
||||
}
|
||||
Event::Start(tag) => {
|
||||
match tag {
|
||||
Tag::Heading { level, .. } => {
|
||||
plain_text.push('\n');
|
||||
plain_text.push_str(&"#".repeat(level as usize));
|
||||
plain_text.push(' ');
|
||||
}
|
||||
Tag::Paragraph => {
|
||||
if !plain_text.is_empty() {
|
||||
plain_text.push_str("\n\n");
|
||||
}
|
||||
}
|
||||
Tag::List(_) => {
|
||||
list_depth += 1;
|
||||
plain_text.push('\n');
|
||||
}
|
||||
Tag::Item => {
|
||||
plain_text.push_str("• ");
|
||||
}
|
||||
Tag::CodeBlock(_) => {
|
||||
in_code_block = true;
|
||||
plain_text.push_str("\n\n");
|
||||
}
|
||||
Tag::Emphasis => plain_text.push('*'),
|
||||
Tag::Strong => plain_text.push_str("**"),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Event::End(tag) => {
|
||||
match tag {
|
||||
TagEnd::Heading(_) => plain_text.push_str("\n\n"),
|
||||
TagEnd::Paragraph => plain_text.push('\n'),
|
||||
TagEnd::List(_) => {
|
||||
list_depth = list_depth.saturating_sub(1);
|
||||
plain_text.push('\n');
|
||||
}
|
||||
TagEnd::Item => plain_text.push('\n'),
|
||||
TagEnd::CodeBlock => {
|
||||
in_code_block = false;
|
||||
plain_text.push_str("\n\n");
|
||||
}
|
||||
TagEnd::Emphasis => plain_text.push('*'),
|
||||
TagEnd::Strong => plain_text.push_str("**"),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Event::Code(code) => {
|
||||
plain_text.push('`');
|
||||
plain_text.push_str(&code);
|
||||
plain_text.push('`');
|
||||
}
|
||||
Event::SoftBreak => plain_text.push(' '),
|
||||
Event::HardBreak => plain_text.push('\n'),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
self.create_pdf_from_text(&plain_text, pdf_path)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
+397
@@ -0,0 +1,397 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::env;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
/// Security configuration for the MCP server
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SecurityConfig {
|
||||
/// If true, only allow read-only operations
|
||||
pub readonly_mode: bool,
|
||||
|
||||
/// Whitelist of allowed commands (if set, only these commands are allowed)
|
||||
pub command_whitelist: Option<HashSet<String>>,
|
||||
|
||||
/// Blacklist of forbidden commands (if set, these commands are blocked)
|
||||
pub command_blacklist: Option<HashSet<String>>,
|
||||
|
||||
/// Maximum document size in bytes (default: 100MB)
|
||||
pub max_document_size: usize,
|
||||
|
||||
/// Maximum number of open documents (default: 50)
|
||||
pub max_open_documents: usize,
|
||||
|
||||
/// Allow external tool usage (LibreOffice, etc.)
|
||||
pub allow_external_tools: bool,
|
||||
|
||||
/// Allow network operations (downloading templates, fonts, etc.)
|
||||
pub allow_network: bool,
|
||||
|
||||
/// Sandbox mode - restricts file operations to temp directory only
|
||||
pub sandbox_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for SecurityConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
readonly_mode: false,
|
||||
command_whitelist: None,
|
||||
command_blacklist: None,
|
||||
max_document_size: 100 * 1024 * 1024, // 100MB
|
||||
max_open_documents: 50,
|
||||
allow_external_tools: true,
|
||||
allow_network: true,
|
||||
sandbox_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SecurityConfig {
|
||||
/// Load configuration from environment variables
|
||||
pub fn from_env() -> Self {
|
||||
let mut config = Self::default();
|
||||
|
||||
// Check for readonly mode
|
||||
if env::var("DOCX_MCP_READONLY").unwrap_or_default() == "true" {
|
||||
config.readonly_mode = true;
|
||||
info!("Running in READONLY mode - only viewing operations allowed");
|
||||
}
|
||||
|
||||
// Check for command whitelist
|
||||
if let Ok(whitelist) = env::var("DOCX_MCP_WHITELIST") {
|
||||
let commands: HashSet<String> = whitelist
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect();
|
||||
config.command_whitelist = Some(commands.clone());
|
||||
info!("Command whitelist enabled with {} commands", commands.len());
|
||||
}
|
||||
|
||||
// Check for command blacklist
|
||||
if let Ok(blacklist) = env::var("DOCX_MCP_BLACKLIST") {
|
||||
let commands: HashSet<String> = blacklist
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_string())
|
||||
.collect();
|
||||
config.command_blacklist = Some(commands.clone());
|
||||
info!("Command blacklist enabled with {} blocked commands", commands.len());
|
||||
}
|
||||
|
||||
// Check for sandbox mode
|
||||
if env::var("DOCX_MCP_SANDBOX").unwrap_or_default() == "true" {
|
||||
config.sandbox_mode = true;
|
||||
config.allow_external_tools = false;
|
||||
config.allow_network = false;
|
||||
info!("Running in SANDBOX mode - restricted file operations");
|
||||
}
|
||||
|
||||
// Check for external tools permission
|
||||
if env::var("DOCX_MCP_NO_EXTERNAL_TOOLS").unwrap_or_default() == "true" {
|
||||
config.allow_external_tools = false;
|
||||
info!("External tools disabled");
|
||||
}
|
||||
|
||||
// Check for network permission
|
||||
if env::var("DOCX_MCP_NO_NETWORK").unwrap_or_default() == "true" {
|
||||
config.allow_network = false;
|
||||
info!("Network operations disabled");
|
||||
}
|
||||
|
||||
// Max document size
|
||||
if let Ok(size) = env::var("DOCX_MCP_MAX_SIZE") {
|
||||
if let Ok(bytes) = size.parse::<usize>() {
|
||||
config.max_document_size = bytes;
|
||||
info!("Max document size set to {} bytes", bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Max open documents
|
||||
if let Ok(max) = env::var("DOCX_MCP_MAX_DOCS") {
|
||||
if let Ok(count) = max.parse::<usize>() {
|
||||
config.max_open_documents = count;
|
||||
info!("Max open documents set to {}", count);
|
||||
}
|
||||
}
|
||||
|
||||
config
|
||||
}
|
||||
|
||||
/// Check if a command is allowed based on security configuration
|
||||
pub fn is_command_allowed(&self, command: &str) -> bool {
|
||||
// First check if it's a readonly command
|
||||
let readonly_commands = Self::get_readonly_commands();
|
||||
let is_readonly_command = readonly_commands.contains(command);
|
||||
|
||||
// In readonly mode, only allow readonly commands
|
||||
if self.readonly_mode && !is_readonly_command {
|
||||
debug!("Command '{}' blocked: readonly mode", command);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check whitelist (if set, only whitelisted commands are allowed)
|
||||
if let Some(ref whitelist) = self.command_whitelist {
|
||||
if !whitelist.contains(command) {
|
||||
debug!("Command '{}' blocked: not in whitelist", command);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check blacklist (if set, blacklisted commands are blocked)
|
||||
if let Some(ref blacklist) = self.command_blacklist {
|
||||
if blacklist.contains(command) {
|
||||
debug!("Command '{}' blocked: in blacklist", command);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Additional checks for specific command categories
|
||||
if command.starts_with("convert_") && !self.allow_external_tools {
|
||||
debug!("Command '{}' blocked: external tools disabled", command);
|
||||
return false;
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Get list of readonly commands
|
||||
pub fn get_readonly_commands() -> HashSet<&'static str> {
|
||||
let mut commands = HashSet::new();
|
||||
|
||||
// Document viewing commands
|
||||
commands.insert("open_document");
|
||||
commands.insert("extract_text");
|
||||
commands.insert("get_metadata");
|
||||
commands.insert("list_documents");
|
||||
commands.insert("get_document_info");
|
||||
commands.insert("read_paragraph");
|
||||
commands.insert("read_table");
|
||||
commands.insert("read_section");
|
||||
commands.insert("search_text");
|
||||
commands.insert("get_document_structure");
|
||||
commands.insert("get_styles");
|
||||
commands.insert("get_headers_footers");
|
||||
commands.insert("get_page_count");
|
||||
commands.insert("get_word_count");
|
||||
commands.insert("get_table_of_contents");
|
||||
commands.insert("list_bookmarks");
|
||||
commands.insert("list_hyperlinks");
|
||||
commands.insert("list_comments");
|
||||
commands.insert("list_footnotes");
|
||||
commands.insert("list_endnotes");
|
||||
commands.insert("get_document_properties");
|
||||
|
||||
// Analysis commands
|
||||
commands.insert("analyze_formatting");
|
||||
commands.insert("check_spelling");
|
||||
commands.insert("check_grammar");
|
||||
commands.insert("get_statistics");
|
||||
commands.insert("compare_documents");
|
||||
|
||||
// Export commands (readonly as they don't modify the original)
|
||||
commands.insert("export_to_json");
|
||||
commands.insert("export_to_markdown");
|
||||
commands.insert("export_to_html");
|
||||
commands.insert("create_preview");
|
||||
|
||||
commands
|
||||
}
|
||||
|
||||
/// Get list of write commands (for documentation)
|
||||
pub fn get_write_commands() -> HashSet<&'static str> {
|
||||
let mut commands = HashSet::new();
|
||||
|
||||
// Document creation/modification
|
||||
commands.insert("create_document");
|
||||
commands.insert("save_document");
|
||||
commands.insert("close_document");
|
||||
|
||||
// Content addition
|
||||
commands.insert("add_paragraph");
|
||||
commands.insert("add_heading");
|
||||
commands.insert("add_table");
|
||||
commands.insert("add_list");
|
||||
commands.insert("add_page_break");
|
||||
commands.insert("add_section_break");
|
||||
commands.insert("add_image");
|
||||
commands.insert("add_chart");
|
||||
commands.insert("add_shape");
|
||||
commands.insert("add_hyperlink");
|
||||
commands.insert("add_bookmark");
|
||||
commands.insert("add_footnote");
|
||||
commands.insert("add_endnote");
|
||||
commands.insert("add_comment");
|
||||
commands.insert("add_watermark");
|
||||
|
||||
// Content modification
|
||||
commands.insert("edit_paragraph");
|
||||
commands.insert("delete_paragraph");
|
||||
commands.insert("find_and_replace");
|
||||
commands.insert("update_table");
|
||||
commands.insert("update_style");
|
||||
commands.insert("set_header");
|
||||
commands.insert("set_footer");
|
||||
commands.insert("set_margins");
|
||||
commands.insert("set_page_size");
|
||||
commands.insert("apply_template");
|
||||
commands.insert("apply_style");
|
||||
commands.insert("apply_theme");
|
||||
|
||||
// Document operations
|
||||
commands.insert("merge_documents");
|
||||
commands.insert("split_document");
|
||||
commands.insert("convert_to_pdf");
|
||||
commands.insert("convert_to_images");
|
||||
commands.insert("protect_document");
|
||||
commands.insert("unprotect_document");
|
||||
commands.insert("track_changes");
|
||||
commands.insert("accept_changes");
|
||||
commands.insert("reject_changes");
|
||||
|
||||
commands
|
||||
}
|
||||
|
||||
/// Check if a file path is allowed based on sandbox configuration
|
||||
pub fn is_path_allowed(&self, path: &std::path::Path) -> bool {
|
||||
if !self.sandbox_mode {
|
||||
return true;
|
||||
}
|
||||
|
||||
// In sandbox mode, only allow operations in temp directory
|
||||
let temp_dir = std::env::temp_dir();
|
||||
if let Ok(canonical_path) = path.canonicalize() {
|
||||
if let Ok(canonical_temp) = temp_dir.canonicalize() {
|
||||
return canonical_path.starts_with(canonical_temp);
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Get a summary of current security settings
|
||||
pub fn get_summary(&self) -> String {
|
||||
let mut summary = Vec::new();
|
||||
|
||||
if self.readonly_mode {
|
||||
summary.push("📖 READONLY MODE");
|
||||
}
|
||||
|
||||
if self.sandbox_mode {
|
||||
summary.push("🔒 SANDBOX MODE");
|
||||
}
|
||||
|
||||
if let Some(ref whitelist) = self.command_whitelist {
|
||||
summary.push(&format!("✅ Whitelist: {} commands", whitelist.len()));
|
||||
}
|
||||
|
||||
if let Some(ref blacklist) = self.command_blacklist {
|
||||
summary.push(&format!("🚫 Blacklist: {} commands", blacklist.len()));
|
||||
}
|
||||
|
||||
if !self.allow_external_tools {
|
||||
summary.push("🔧 No external tools");
|
||||
}
|
||||
|
||||
if !self.allow_network {
|
||||
summary.push("🌐 No network access");
|
||||
}
|
||||
|
||||
if summary.is_empty() {
|
||||
"Standard mode (all features enabled)".to_string()
|
||||
} else {
|
||||
summary.join(" | ")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Security middleware to check commands before execution
|
||||
pub struct SecurityMiddleware {
|
||||
config: SecurityConfig,
|
||||
}
|
||||
|
||||
impl SecurityMiddleware {
|
||||
pub fn new(config: SecurityConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Check if a command should be allowed to execute
|
||||
pub fn check_command(&self, command: &str, arguments: &serde_json::Value) -> Result<(), SecurityError> {
|
||||
// Check if command is allowed
|
||||
if !self.config.is_command_allowed(command) {
|
||||
return Err(SecurityError::CommandNotAllowed(command.to_string()));
|
||||
}
|
||||
|
||||
// Check file paths in arguments if in sandbox mode
|
||||
if self.config.sandbox_mode {
|
||||
self.check_paths_in_arguments(arguments)?;
|
||||
}
|
||||
|
||||
// Check document size limits for open/create operations
|
||||
if command == "open_document" {
|
||||
if let Some(path) = arguments.get("path").and_then(|v| v.as_str()) {
|
||||
self.check_file_size(path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_paths_in_arguments(&self, arguments: &serde_json::Value) -> Result<(), SecurityError> {
|
||||
// Recursively check all string values that look like paths
|
||||
match arguments {
|
||||
serde_json::Value::String(s) => {
|
||||
if s.contains('/') || s.contains('\\') {
|
||||
let path = std::path::Path::new(s);
|
||||
if !self.config.is_path_allowed(path) {
|
||||
return Err(SecurityError::PathNotAllowed(s.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
serde_json::Value::Object(map) => {
|
||||
for value in map.values() {
|
||||
self.check_paths_in_arguments(value)?;
|
||||
}
|
||||
}
|
||||
serde_json::Value::Array(arr) => {
|
||||
for value in arr {
|
||||
self.check_paths_in_arguments(value)?;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_file_size(&self, path: &str) -> Result<(), SecurityError> {
|
||||
let file_path = std::path::Path::new(path);
|
||||
if let Ok(metadata) = std::fs::metadata(file_path) {
|
||||
if metadata.len() as usize > self.config.max_document_size {
|
||||
return Err(SecurityError::FileTooLarge {
|
||||
size: metadata.len() as usize,
|
||||
max_size: self.config.max_document_size,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SecurityError {
|
||||
#[error("Command not allowed: {0}")]
|
||||
CommandNotAllowed(String),
|
||||
|
||||
#[error("Path not allowed in sandbox mode: {0}")]
|
||||
PathNotAllowed(String),
|
||||
|
||||
#[error("File too large: {size} bytes (max: {max_size} bytes)")]
|
||||
FileTooLarge { size: usize, max_size: usize },
|
||||
|
||||
#[error("Maximum number of open documents exceeded")]
|
||||
TooManyDocuments,
|
||||
|
||||
#[error("Operation requires external tools which are disabled")]
|
||||
ExternalToolsDisabled,
|
||||
|
||||
#[error("Operation requires network access which is disabled")]
|
||||
NetworkDisabled,
|
||||
}
|
||||
Reference in New Issue
Block a user