feat(mcp): introduce simple range model and selector-based editing

- RangeId for paragraphs, headings, and table cells
- Tools: get_outline, get_ranges, replace_range_text, set_table_cell_text
- Keeps edits idempotent and precise for AI workflows
This commit is contained in:
Andy
2025-08-13 00:08:04 +08:00
parent 3b05711e92
commit d3fbbcfd7c
2 changed files with 216 additions and 0 deletions
+131
View File
@@ -68,6 +68,14 @@ pub struct DocxHandler {
in_memory_ops: std::collections::HashMap<String, Vec<DocxOp>>, in_memory_ops: std::collections::HashMap<String, Vec<DocxOp>>,
} }
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind")]
pub enum RangeId {
Paragraph { index: usize },
Heading { index: usize },
TableCell { table_index: usize, row: usize, col: usize },
}
impl DocxHandler { impl DocxHandler {
pub fn new() -> Result<Self> { pub fn new() -> Result<Self> {
let base = std::env::var_os("DOCX_MCP_TEMP").map(PathBuf::from).unwrap_or_else(|| std::env::temp_dir()); let base = std::env::var_os("DOCX_MCP_TEMP").map(PathBuf::from).unwrap_or_else(|| std::env::temp_dir());
@@ -609,6 +617,129 @@ impl DocxHandler {
})) }))
} }
/// Outline with stable indices for headings (range_ids)
pub fn get_outline(&self, doc_id: &str) -> Result<serde_json::Value> {
let ops = self.in_memory_ops.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("No in-memory ops for document: {}", doc_id))?;
let mut outline = Vec::new();
let mut heading_idx = 0usize;
for op in ops.iter() {
if let DocxOp::Heading { text, style } = op {
let level = style.chars().last().and_then(|c| c.to_digit(10)).map(|d| d as usize).unwrap_or(1);
outline.push(serde_json::json!({
"text": text,
"level": level,
"range_id": RangeId::Heading { index: heading_idx }
}));
heading_idx += 1;
}
}
Ok(serde_json::json!({"outline": outline}))
}
/// Simple selector to ranges. Supported selectors:
/// - heading:'Text'
/// - paragraph[INDEX]
/// - table[T].cell[R,C]
pub fn get_ranges(&self, doc_id: &str, selector: &str) -> Result<Vec<RangeId>> {
let ops = self.in_memory_ops.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("No in-memory ops for document: {}", doc_id))?;
let mut results = Vec::new();
if let Some(rest) = selector.strip_prefix("heading:") {
let needle = rest.trim().trim_matches('\'').trim_matches('"');
let mut idx = 0usize;
for op in ops.iter() {
if let DocxOp::Heading { text, .. } = op {
if text == needle { results.push(RangeId::Heading { index: idx }); }
idx += 1;
}
}
return Ok(results);
}
if let Some(start) = selector.strip_prefix("paragraph[") {
if let Some(endpos) = start.find(']') {
if let Ok(pi) = start[..endpos].parse::<usize>() {
results.push(RangeId::Paragraph { index: pi });
return Ok(results);
}
}
}
if let Some(start) = selector.strip_prefix("table[") {
if let Some(endt) = start.find(']') {
let t_str = &start[..endt];
if let Some(cell_part) = start[endt+1..].strip_prefix(".cell[") {
if let Some(endc) = cell_part.find(']') {
let coords = &cell_part[..endc];
let mut it = coords.split(',');
if let (Ok(ti), Some(rs), Some(cs)) = (
t_str.parse::<usize>(),
it.next(), it.next()
) {
if let (Ok(r), Ok(c)) = (rs.trim().parse::<usize>(), cs.trim().parse::<usize>()) {
results.push(RangeId::TableCell { table_index: ti, row: r, col: c });
return Ok(results);
}
}
}
}
}
}
Ok(results)
}
/// Replace text in a given range id (paragraph or heading). For TableCell use set_table_cell_text
pub fn replace_range_text(&mut self, doc_id: &str, range: &RangeId, new_text: &str) -> Result<()> {
self.ensure_modifiable(doc_id)?;
let ops = self.in_memory_ops.get_mut(doc_id)
.ok_or_else(|| anyhow::anyhow!("No in-memory ops for document: {}", doc_id))?;
match range {
RangeId::Paragraph { index } => {
let mut para_idx = 0usize;
for op in ops.iter_mut() {
if let DocxOp::Paragraph { text, .. } = op {
if &para_idx == index { *text = new_text.to_string(); break; }
para_idx += 1;
}
}
}
RangeId::Heading { index } => {
let mut h_idx = 0usize;
for op in ops.iter_mut() {
if let DocxOp::Heading { text, .. } = op {
if &h_idx == index { *text = new_text.to_string(); break; }
h_idx += 1;
}
}
}
RangeId::TableCell { .. } => anyhow::bail!("Use set_table_cell_text for table cells"),
}
self.write_docx(doc_id)?;
Ok(())
}
/// Set table cell text by table index and coordinates
pub fn set_table_cell_text(&mut self, doc_id: &str, table_index: usize, row: usize, col: usize, text: &str) -> Result<()> {
self.ensure_modifiable(doc_id)?;
let ops = self.in_memory_ops.get_mut(doc_id)
.ok_or_else(|| anyhow::anyhow!("No in-memory ops for document: {}", doc_id))?;
let mut ti = 0usize;
for op in ops.iter_mut() {
if let DocxOp::Table { data } = op {
if ti == table_index {
if row < data.rows.len() && col < data.rows[row].len() {
data.rows[row][col] = text.to_string();
self.write_docx(doc_id)?;
return Ok(());
} else {
anyhow::bail!("Cell out of bounds");
}
}
ti += 1;
}
}
anyhow::bail!("Table not found")
}
pub fn extract_text(&self, doc_id: &str) -> Result<String> { pub fn extract_text(&self, doc_id: &str) -> Result<String> {
let _metadata = self.documents.get(doc_id) let _metadata = self.documents.get(doc_id)
.ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?; .ok_or_else(|| anyhow::anyhow!("Document not found: {}", doc_id))?;
+85
View File
@@ -734,6 +734,46 @@ impl DocxToolsProvider {
}), }),
annotations: None, annotations: None,
}, },
Tool {
name: "get_outline".to_string(),
description: Some("Return heading outline with range_ids".to_string()),
input_schema: json!({
"type": "object",
"properties": {"document_id": {"type": "string"}},
"required": ["document_id"]
}),
annotations: None,
},
Tool {
name: "get_ranges".to_string(),
description: Some("Resolve a selector to range_ids (heading:'Text', paragraph[i], table[t].cell[r,c])".to_string()),
input_schema: json!({
"type": "object",
"properties": {"document_id": {"type": "string"}, "selector": {"type": "string"}},
"required": ["document_id", "selector"]
}),
annotations: None,
},
Tool {
name: "replace_range_text".to_string(),
description: Some("Replace text in a paragraph/heading by range_id".to_string()),
input_schema: json!({
"type": "object",
"properties": {"document_id": {"type": "string"}, "range_id": {"type": "object"}, "text": {"type": "string"}},
"required": ["document_id", "range_id", "text"]
}),
annotations: None,
},
Tool {
name: "set_table_cell_text".to_string(),
description: Some("Set text in a table cell by indices".to_string()),
input_schema: json!({
"type": "object",
"properties": {"document_id": {"type": "string"}, "table_index": {"type": "integer"}, "row": {"type": "integer"}, "col": {"type": "integer"}, "text": {"type": "string"}},
"required": ["document_id", "table_index", "row", "col", "text"]
}),
annotations: None,
},
Tool { Tool {
name: "get_document_properties".to_string(), name: "get_document_properties".to_string(),
description: Some("Get document properties (title, subject, author, timestamps)".to_string()), description: Some("Get document properties (title, subject, author, timestamps)".to_string()),
@@ -1452,6 +1492,51 @@ impl DocxToolsProvider {
Err(e) => ToolOutcome::Error { code: ErrorCode::DocNotFound, error: e.to_string(), hint: None } Err(e) => ToolOutcome::Error { code: ErrorCode::DocNotFound, error: e.to_string(), hint: None }
} }
}, },
"get_outline" => {
let doc_id = arguments["document_id"].as_str().unwrap_or("");
let handler = self.handler.read().unwrap();
match handler.get_outline(doc_id) {
Ok(outline) => ToolOutcome::Metadata { metadata: outline },
Err(e) => ToolOutcome::Error { code: ErrorCode::DocNotFound, error: e.to_string(), hint: None },
}
},
"get_ranges" => {
let doc_id = arguments["document_id"].as_str().unwrap_or("");
let selector = arguments["selector"].as_str().unwrap_or("");
let handler = self.handler.read().unwrap();
match handler.get_ranges(doc_id, selector) {
Ok(ranges) => ToolOutcome::Metadata { metadata: serde_json::json!({"ranges": ranges}) },
Err(e) => ToolOutcome::Error { code: ErrorCode::DocNotFound, error: e.to_string(), hint: None },
}
},
"replace_range_text" => {
let doc_id = arguments["document_id"].as_str().unwrap_or("");
let range_id = arguments["range_id"].clone();
let text = arguments["text"].as_str().unwrap_or("");
let range: crate::docx_handler::RangeId = match serde_json::from_value(range_id) {
Ok(v) => v,
Err(e) => {
return CallToolResponse { content: vec![ToolResponseContent::Text(TextContent { content_type: "application/json".into(), text: serde_json::json!({"success": false, "code": ErrorCode::ValidationError, "error": format!("invalid range_id: {}", e)}).to_string(), annotations: None })], is_error: Some(true), meta: None };
}
};
let mut handler = self.handler.write().unwrap();
match handler.replace_range_text(doc_id, &range, text) {
Ok(_) => ToolOutcome::Ok { message: Some("Range text replaced".into()) },
Err(e) => ToolOutcome::Error { code: ErrorCode::ValidationError, error: e.to_string(), hint: None },
}
},
"set_table_cell_text" => {
let doc_id = arguments["document_id"].as_str().unwrap_or("");
let ti = arguments["table_index"].as_u64().unwrap_or(0) as usize;
let r = arguments["row"].as_u64().unwrap_or(0) as usize;
let c = arguments["col"].as_u64().unwrap_or(0) as usize;
let text = arguments["text"].as_str().unwrap_or("");
let mut handler = self.handler.write().unwrap();
match handler.set_table_cell_text(doc_id, ti, r, c, text) {
Ok(_) => ToolOutcome::Ok { message: Some("Table cell updated".into()) },
Err(e) => ToolOutcome::Error { code: ErrorCode::ValidationError, error: e.to_string(), hint: None },
}
},
"analyze_formatting" => { "analyze_formatting" => {
let doc_id = arguments["document_id"].as_str().unwrap_or(""); let doc_id = arguments["document_id"].as_str().unwrap_or("");