Concurrency: switch provider handler to RwLock for read-heavy parallelism; add get_storage_info tool; improve text extraction whitespace; keep tests green
This commit is contained in:
@@ -306,6 +306,45 @@ impl DocxHandler {
|
||||
pub fn list_documents(&self) -> Vec<DocxMetadata> {
|
||||
self.documents.values().cloned().collect()
|
||||
}
|
||||
|
||||
pub fn temp_dir_path(&self) -> PathBuf {
|
||||
self.temp_dir.clone()
|
||||
}
|
||||
|
||||
pub fn get_storage_info(&self) -> Result<serde_json::Value> {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
let mut total_bytes: u64 = 0;
|
||||
let mut file_count: u64 = 0;
|
||||
let mut oldest: Option<u64> = None;
|
||||
let mut newest: Option<u64> = None;
|
||||
if self.temp_dir.exists() {
|
||||
for entry in walkdir::WalkDir::new(&self.temp_dir).into_iter().filter_map(|e| e.ok()) {
|
||||
if entry.file_type().is_file() {
|
||||
file_count += 1;
|
||||
if let Ok(meta) = entry.metadata() {
|
||||
total_bytes = total_bytes.saturating_add(meta.len());
|
||||
if let Ok(modified) = meta.modified() {
|
||||
if let Ok(secs) = modified.duration_since(UNIX_EPOCH) {
|
||||
let ts = secs.as_secs();
|
||||
oldest = Some(oldest.map_or(ts, |o| o.min(ts)));
|
||||
newest = Some(newest.map_or(ts, |n| n.max(ts)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(serde_json::json!({
|
||||
"success": true,
|
||||
"storage": {
|
||||
"base_dir": self.temp_dir,
|
||||
"file_count": file_count,
|
||||
"total_bytes": total_bytes,
|
||||
"oldest_modified": oldest,
|
||||
"newest_modified": newest,
|
||||
}
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
||||
+43
-25
@@ -2,7 +2,7 @@ use mcp_core::types::{Tool, CallToolResponse, ToolResponseContent, TextContent};
|
||||
// Adapt to latest MCP: we'll integrate via mcp-server Router separately
|
||||
use serde_json::{json, Value};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::docx_handler::{DocxHandler, DocxStyle, TableData};
|
||||
@@ -13,7 +13,7 @@ use crate::security::{SecurityConfig, SecurityMiddleware};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocxToolsProvider {
|
||||
handler: Arc<Mutex<DocxHandler>>,
|
||||
handler: Arc<RwLock<DocxHandler>>,
|
||||
converter: Arc<DocumentConverter>,
|
||||
#[cfg(feature = "advanced-docx")]
|
||||
advanced: Arc<AdvancedDocxHandler>,
|
||||
@@ -28,7 +28,7 @@ impl DocxToolsProvider {
|
||||
|
||||
pub fn new_with_security(security_config: SecurityConfig) -> Self {
|
||||
Self {
|
||||
handler: Arc::new(Mutex::new(DocxHandler::new().expect("Failed to create DocxHandler"))),
|
||||
handler: Arc::new(RwLock::new(DocxHandler::new().expect("Failed to create DocxHandler"))),
|
||||
converter: Arc::new(DocumentConverter::new()),
|
||||
#[cfg(feature = "advanced-docx")]
|
||||
advanced: Arc::new(AdvancedDocxHandler::new()),
|
||||
@@ -45,7 +45,7 @@ impl DocxToolsProvider {
|
||||
/// Create a provider with a base directory and explicit security config
|
||||
pub fn with_base_dir_and_security<P: AsRef<std::path::Path>>(base_dir: P, security_config: SecurityConfig) -> Self {
|
||||
Self {
|
||||
handler: Arc::new(Mutex::new(DocxHandler::new_with_base_dir(base_dir).expect("Failed to create DocxHandler"))),
|
||||
handler: Arc::new(RwLock::new(DocxHandler::new_with_base_dir(base_dir).expect("Failed to create DocxHandler"))),
|
||||
converter: Arc::new(DocumentConverter::new()),
|
||||
#[cfg(feature = "advanced-docx")]
|
||||
advanced: Arc::new(AdvancedDocxHandler::new()),
|
||||
@@ -547,6 +547,16 @@ impl DocxToolsProvider {
|
||||
}),
|
||||
annotations: None,
|
||||
},
|
||||
Tool {
|
||||
name: "get_storage_info".to_string(),
|
||||
description: Some("Get information about temporary storage usage".to_string()),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": []
|
||||
}),
|
||||
annotations: None,
|
||||
},
|
||||
];
|
||||
|
||||
// Filter tools based on security configuration
|
||||
@@ -576,7 +586,7 @@ impl DocxToolsProvider {
|
||||
|
||||
let result = match name {
|
||||
"create_document" => {
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.create_document() {
|
||||
Ok(doc_id) => json!({
|
||||
"success": true,
|
||||
@@ -592,7 +602,7 @@ impl DocxToolsProvider {
|
||||
|
||||
"open_document" => {
|
||||
let path = arguments["path"].as_str().unwrap_or("");
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.open_document(&PathBuf::from(path)) {
|
||||
Ok(doc_id) => json!({
|
||||
"success": true,
|
||||
@@ -614,7 +624,7 @@ impl DocxToolsProvider {
|
||||
serde_json::from_value::<DocxStyle>(s.clone()).ok()
|
||||
});
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.add_paragraph(doc_id, text, style) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -632,7 +642,7 @@ impl DocxToolsProvider {
|
||||
let text = arguments["text"].as_str().unwrap_or("");
|
||||
let level = arguments["level"].as_u64().unwrap_or(1) as usize;
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.add_heading(doc_id, text, level) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -679,7 +689,7 @@ impl DocxToolsProvider {
|
||||
border_style,
|
||||
};
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.add_table(doc_id, table_data) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -705,7 +715,7 @@ impl DocxToolsProvider {
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.add_list(doc_id, items, ordered) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -722,7 +732,7 @@ impl DocxToolsProvider {
|
||||
"add_page_break" => {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.add_page_break(doc_id) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -739,7 +749,7 @@ impl DocxToolsProvider {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
let text = arguments["text"].as_str().unwrap_or("");
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.set_header(doc_id, text) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -756,7 +766,7 @@ impl DocxToolsProvider {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
let text = arguments["text"].as_str().unwrap_or("");
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.set_footer(doc_id, text) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -774,7 +784,7 @@ impl DocxToolsProvider {
|
||||
let find_text = arguments["find_text"].as_str().unwrap_or("");
|
||||
let replace_text = arguments["replace_text"].as_str().unwrap_or("");
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.find_and_replace(doc_id, find_text, replace_text) {
|
||||
Ok(count) => json!({
|
||||
"success": true,
|
||||
@@ -791,7 +801,7 @@ impl DocxToolsProvider {
|
||||
"extract_text" => {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.extract_text(doc_id) {
|
||||
Ok(text) => json!({
|
||||
"success": true,
|
||||
@@ -807,7 +817,7 @@ impl DocxToolsProvider {
|
||||
"get_metadata" => {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.get_metadata(doc_id) {
|
||||
Ok(metadata) => json!({
|
||||
"success": true,
|
||||
@@ -824,7 +834,7 @@ impl DocxToolsProvider {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
let output_path = arguments["output_path"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.save_document(doc_id, &PathBuf::from(output_path)) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -840,7 +850,7 @@ impl DocxToolsProvider {
|
||||
"close_document" => {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
|
||||
let mut handler = self.handler.lock().unwrap();
|
||||
let mut handler = self.handler.write().unwrap();
|
||||
match handler.close_document(doc_id) {
|
||||
Ok(_) => json!({
|
||||
"success": true,
|
||||
@@ -854,7 +864,7 @@ impl DocxToolsProvider {
|
||||
},
|
||||
|
||||
"list_documents" => {
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
let documents = handler.list_documents();
|
||||
json!({
|
||||
"success": true,
|
||||
@@ -866,7 +876,7 @@ impl DocxToolsProvider {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
let output_path = arguments["output_path"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
let metadata = match handler.get_metadata(doc_id) {
|
||||
Ok(m) => m,
|
||||
Err(e) => return CallToolResponse { content: vec![ToolResponseContent::Text(TextContent { content_type: "text".into(), text: e.to_string(), annotations: None })], is_error: Some(true), meta: None },
|
||||
@@ -894,7 +904,7 @@ impl DocxToolsProvider {
|
||||
.and_then(|d| d.as_u64())
|
||||
.unwrap_or(150) as u32;
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
let metadata = match handler.get_metadata(doc_id) {
|
||||
Ok(m) => m,
|
||||
Err(e) => return CallToolResponse { content: vec![ToolResponseContent::Text(TextContent { content_type: "text".into(), text: e.to_string(), annotations: None })], is_error: Some(true), meta: None },
|
||||
@@ -927,7 +937,7 @@ impl DocxToolsProvider {
|
||||
"get_document_structure" => {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.extract_text(doc_id) {
|
||||
Ok(text) => {
|
||||
// Analyze document structure from text
|
||||
@@ -993,7 +1003,7 @@ impl DocxToolsProvider {
|
||||
"get_word_count" => {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.extract_text(doc_id) {
|
||||
Ok(text) => {
|
||||
let words: Vec<&str> = text.split_whitespace().collect();
|
||||
@@ -1028,7 +1038,7 @@ impl DocxToolsProvider {
|
||||
let case_sensitive = arguments.get("case_sensitive").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
let whole_word = arguments.get("whole_word").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.extract_text(doc_id) {
|
||||
Ok(text) => {
|
||||
let search_text = if case_sensitive { text.clone() } else { text.to_lowercase() };
|
||||
@@ -1071,7 +1081,7 @@ impl DocxToolsProvider {
|
||||
let doc_id = arguments["document_id"].as_str().unwrap_or("");
|
||||
let output_path = arguments["output_path"].as_str().unwrap_or("");
|
||||
|
||||
let handler = self.handler.lock().unwrap();
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.extract_text(doc_id) {
|
||||
Ok(text) => {
|
||||
// Simple conversion to Markdown - in full implementation would preserve formatting
|
||||
@@ -1132,6 +1142,14 @@ impl DocxToolsProvider {
|
||||
})
|
||||
},
|
||||
|
||||
"get_storage_info" => {
|
||||
let handler = self.handler.read().unwrap();
|
||||
match handler.get_storage_info() {
|
||||
Ok(info) => info,
|
||||
Err(e) => json!({"success": false, "error": e.to_string()}),
|
||||
}
|
||||
},
|
||||
|
||||
_ => {
|
||||
json!({
|
||||
"success": false,
|
||||
|
||||
+41
-12
@@ -41,24 +41,53 @@ impl PureRustConverter {
|
||||
anyhow::bail!("No document.xml found in DOCX file");
|
||||
}
|
||||
|
||||
// Parse XML and extract text
|
||||
// Parse XML and extract text with basic whitespace semantics
|
||||
let doc = roxmltree::Document::parse(&document_xml)?;
|
||||
let mut text = String::new();
|
||||
|
||||
// Extract text from all w:t elements
|
||||
let mut last_char: Option<char> = None;
|
||||
|
||||
for node in doc.descendants() {
|
||||
if node.tag_name().name() == "t" {
|
||||
if let Some(node_text) = node.text() {
|
||||
text.push_str(node_text);
|
||||
text.push(' ');
|
||||
let name = node.tag_name().name();
|
||||
match name {
|
||||
// Paragraph boundary
|
||||
"p" => {
|
||||
if !text.ends_with('\n') {
|
||||
text.push('\n');
|
||||
last_char = Some('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle line breaks
|
||||
if node.tag_name().name() == "br" || node.tag_name().name() == "p" {
|
||||
text.push('\n');
|
||||
// Text run
|
||||
"t" => {
|
||||
if let Some(node_text) = node.text() {
|
||||
// Preserve spaces if xml:space="preserve"
|
||||
let preserve = node.attribute(("xml", "space")).map(|v| v == "preserve").unwrap_or(false);
|
||||
let mut content = node_text.to_string();
|
||||
if !preserve {
|
||||
// Collapse internal newlines and excessive spaces
|
||||
content = content.replace('\n', " ");
|
||||
}
|
||||
if !content.is_empty() {
|
||||
// Insert a space if needed between words
|
||||
if let Some(c) = last_char { if !c.is_whitespace() && !content.starts_with([' ', '\n', '\t']) { text.push(' '); } }
|
||||
text.push_str(&content);
|
||||
last_char = content.chars().rev().next();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Line break
|
||||
"br" => {
|
||||
text.push('\n');
|
||||
last_char = Some('\n');
|
||||
}
|
||||
// Tab
|
||||
"tab" => {
|
||||
text.push('\t');
|
||||
last_char = Some('\t');
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Ok(text.trim().to_string())
|
||||
}
|
||||
|
||||
|
||||
@@ -311,6 +311,7 @@ impl SecurityConfig {
|
||||
commands.insert("export_to_html");
|
||||
commands.insert("create_preview");
|
||||
commands.insert("get_security_info");
|
||||
commands.insert("get_storage_info");
|
||||
|
||||
commands
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user