Initial Commit
This commit is contained in:
Vendored
+392
@@ -0,0 +1,392 @@
|
||||
//! Test data generators and utilities
|
||||
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Generates test data for various document types and scenarios
|
||||
pub struct TestDataGenerator;
|
||||
|
||||
impl TestDataGenerator {
|
||||
/// Generate test paragraphs with varying complexity
|
||||
pub fn generate_paragraphs(count: usize, complexity: ParagraphComplexity) -> Vec<String> {
|
||||
let base_sentences = match complexity {
|
||||
ParagraphComplexity::Simple => vec![
|
||||
"This is a simple sentence.",
|
||||
"Another basic statement follows.",
|
||||
"The text remains straightforward.",
|
||||
"No complex structures here.",
|
||||
"Plain language is used throughout.",
|
||||
],
|
||||
ParagraphComplexity::Medium => vec![
|
||||
"This sentence demonstrates moderate complexity with additional clauses and descriptive elements.",
|
||||
"Furthermore, the content includes various punctuation marks, numbers like 123, and technical terms.",
|
||||
"The writing style incorporates both simple and compound sentence structures for variety.",
|
||||
"Additionally, references to specific dates (December 15, 2024) and percentages (85%) are included.",
|
||||
"These paragraphs simulate realistic document content found in business communications.",
|
||||
],
|
||||
ParagraphComplexity::Complex => vec![
|
||||
"This comprehensive sentence exemplifies sophisticated linguistic structures, incorporating multiple subordinate clauses, technical terminology, and complex syntactical arrangements that challenge both human readers and automated processing systems.",
|
||||
"Moreover, the content integrates diverse elements including numerical data (such as 42.7% improvement rates), temporal references (spanning Q3 2024 through Q1 2025), geographical locations (Silicon Valley, New York, London), and industry-specific jargon that reflects real-world document complexity.",
|
||||
"The methodology employed in generating these test paragraphs considers various factors: readability indices, sentence length distribution, vocabulary diversity, and the inclusion of special characters (e.g., àáâãäå, €£¥, ∑∏∫) to ensure comprehensive testing coverage.",
|
||||
"Consequently, these multi-faceted paragraphs serve as effective benchmarks for evaluating system performance under realistic conditions, while simultaneously providing sufficient content variation to identify potential edge cases and optimization opportunities.",
|
||||
],
|
||||
};
|
||||
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
let sentence_count = match complexity {
|
||||
ParagraphComplexity::Simple => 2 + (i % 3),
|
||||
ParagraphComplexity::Medium => 3 + (i % 4),
|
||||
ParagraphComplexity::Complex => 2 + (i % 3),
|
||||
};
|
||||
|
||||
let mut paragraph = String::new();
|
||||
for j in 0..sentence_count {
|
||||
let sentence = &base_sentences[j % base_sentences.len()];
|
||||
if j > 0 {
|
||||
paragraph.push(' ');
|
||||
}
|
||||
paragraph.push_str(sentence);
|
||||
}
|
||||
|
||||
paragraph
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate table data with specified dimensions and content type
|
||||
pub fn generate_table_data(rows: usize, cols: usize, content_type: TableContentType) -> Vec<Vec<String>> {
|
||||
let mut table_data = Vec::new();
|
||||
|
||||
// Generate header row
|
||||
let headers: Vec<String> = (0..cols)
|
||||
.map(|i| match content_type {
|
||||
TableContentType::Generic => format!("Column {}", i + 1),
|
||||
TableContentType::Financial => match i {
|
||||
0 => "Period".to_string(),
|
||||
1 => "Revenue".to_string(),
|
||||
2 => "Expenses".to_string(),
|
||||
3 => "Profit".to_string(),
|
||||
_ => format!("Metric {}", i + 1),
|
||||
},
|
||||
TableContentType::Personnel => match i {
|
||||
0 => "Name".to_string(),
|
||||
1 => "Department".to_string(),
|
||||
2 => "Role".to_string(),
|
||||
3 => "Start Date".to_string(),
|
||||
_ => format!("Field {}", i + 1),
|
||||
},
|
||||
TableContentType::Technical => match i {
|
||||
0 => "Component".to_string(),
|
||||
1 => "Version".to_string(),
|
||||
2 => "Status".to_string(),
|
||||
3 => "Last Updated".to_string(),
|
||||
_ => format!("Attribute {}", i + 1),
|
||||
},
|
||||
})
|
||||
.collect();
|
||||
|
||||
table_data.push(headers);
|
||||
|
||||
// Generate data rows
|
||||
for row in 0..rows {
|
||||
let row_data: Vec<String> = (0..cols)
|
||||
.map(|col| match content_type {
|
||||
TableContentType::Generic => format!("R{}C{}", row + 1, col + 1),
|
||||
TableContentType::Financial => match col {
|
||||
0 => format!("Q{} 2024", (row % 4) + 1),
|
||||
1 => format!("${:.1}M", 100.0 + row as f64 * 12.5),
|
||||
2 => format!("${:.1}M", 70.0 + row as f64 * 8.2),
|
||||
3 => format!("${:.1}M", 30.0 + row as f64 * 4.3),
|
||||
_ => format!("{:.1}%", 15.0 + row as f64 * 2.1),
|
||||
},
|
||||
TableContentType::Personnel => match col {
|
||||
0 => format!("Employee {}", row + 1),
|
||||
1 => ["Engineering", "Sales", "Marketing", "Operations"][(row % 4)].to_string(),
|
||||
2 => ["Manager", "Developer", "Analyst", "Specialist"][(row % 4)].to_string(),
|
||||
3 => format!("2024-{:02}-{:02}", ((row % 12) + 1), ((row % 28) + 1)),
|
||||
_ => format!("Data {}", row + 1),
|
||||
},
|
||||
TableContentType::Technical => match col {
|
||||
0 => format!("Component-{}", row + 1),
|
||||
1 => format!("v{}.{}.{}", (row % 3) + 1, (row % 5), (row % 10)),
|
||||
2 => ["Active", "Pending", "Deprecated", "Testing"][(row % 4)].to_string(),
|
||||
3 => format!("2024-12-{:02}", ((row % 28) + 1)),
|
||||
_ => format!("Value {}", row + 1),
|
||||
},
|
||||
})
|
||||
.collect();
|
||||
|
||||
table_data.push(row_data);
|
||||
}
|
||||
|
||||
table_data
|
||||
}
|
||||
|
||||
/// Generate list items with specified count and category
|
||||
pub fn generate_list_items(count: usize, category: ListCategory) -> Vec<String> {
|
||||
let base_items = match category {
|
||||
ListCategory::Tasks => vec![
|
||||
"Complete project documentation",
|
||||
"Review code changes and pull requests",
|
||||
"Update system configuration files",
|
||||
"Run comprehensive test suite",
|
||||
"Deploy to staging environment",
|
||||
"Conduct security audit",
|
||||
"Optimize database performance",
|
||||
"Update user interface components",
|
||||
"Implement new feature requirements",
|
||||
"Fix reported bugs and issues",
|
||||
],
|
||||
ListCategory::Features => vec![
|
||||
"Advanced search and filtering capabilities",
|
||||
"Real-time collaboration tools",
|
||||
"Automated backup and recovery",
|
||||
"Multi-language support",
|
||||
"Mobile-responsive design",
|
||||
"Integration with third-party services",
|
||||
"Customizable dashboard and reports",
|
||||
"Role-based access control",
|
||||
"API for external integrations",
|
||||
"Advanced analytics and insights",
|
||||
],
|
||||
ListCategory::Requirements => vec![
|
||||
"System must support 1000+ concurrent users",
|
||||
"Response time must be under 200ms for 95% of requests",
|
||||
"Uptime must exceed 99.9% availability",
|
||||
"Data must be encrypted both in transit and at rest",
|
||||
"User interface must be accessible (WCAG 2.1 AA)",
|
||||
"System must support multi-factor authentication",
|
||||
"Backup processes must complete within 2 hours",
|
||||
"Security patches must be applied within 24 hours",
|
||||
"System must scale horizontally to handle peak loads",
|
||||
"Audit logs must be maintained for minimum 7 years",
|
||||
],
|
||||
ListCategory::Benefits => vec![
|
||||
"Increased operational efficiency by 35%",
|
||||
"Reduced manual processing time by 60%",
|
||||
"Improved data accuracy and consistency",
|
||||
"Enhanced security and compliance posture",
|
||||
"Better user experience and satisfaction",
|
||||
"Lower total cost of ownership",
|
||||
"Faster time-to-market for new features",
|
||||
"Improved scalability and performance",
|
||||
"Better decision-making through analytics",
|
||||
"Reduced maintenance and support costs",
|
||||
],
|
||||
};
|
||||
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
let base_item = &base_items[i % base_items.len()];
|
||||
if count > base_items.len() {
|
||||
format!("{} (item {})", base_item, i + 1)
|
||||
} else {
|
||||
base_item.clone()
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generate realistic business data for testing
|
||||
pub fn generate_business_data() -> BusinessDataSet {
|
||||
BusinessDataSet {
|
||||
companies: vec![
|
||||
"Acme Corporation".to_string(),
|
||||
"Global Tech Solutions".to_string(),
|
||||
"Innovation Partners LLC".to_string(),
|
||||
"Digital Dynamics Inc".to_string(),
|
||||
"Future Systems Ltd".to_string(),
|
||||
],
|
||||
departments: vec![
|
||||
"Engineering".to_string(),
|
||||
"Sales & Marketing".to_string(),
|
||||
"Human Resources".to_string(),
|
||||
"Operations".to_string(),
|
||||
"Finance & Accounting".to_string(),
|
||||
"Research & Development".to_string(),
|
||||
],
|
||||
positions: vec![
|
||||
"Software Engineer".to_string(),
|
||||
"Product Manager".to_string(),
|
||||
"Sales Representative".to_string(),
|
||||
"Data Analyst".to_string(),
|
||||
"Project Manager".to_string(),
|
||||
"UX Designer".to_string(),
|
||||
],
|
||||
locations: vec![
|
||||
"San Francisco, CA".to_string(),
|
||||
"New York, NY".to_string(),
|
||||
"Austin, TX".to_string(),
|
||||
"Seattle, WA".to_string(),
|
||||
"Boston, MA".to_string(),
|
||||
"Chicago, IL".to_string(),
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate MCP tool call test data
|
||||
pub fn generate_mcp_test_calls() -> Vec<McpTestCall> {
|
||||
vec![
|
||||
McpTestCall {
|
||||
tool_name: "create_document".to_string(),
|
||||
args: json!({}),
|
||||
expected_success: true,
|
||||
expected_result_keys: vec!["success".to_string(), "document_id".to_string()],
|
||||
},
|
||||
McpTestCall {
|
||||
tool_name: "add_paragraph".to_string(),
|
||||
args: json!({
|
||||
"document_id": "test-doc-id",
|
||||
"text": "Test paragraph content"
|
||||
}),
|
||||
expected_success: true,
|
||||
expected_result_keys: vec!["success".to_string()],
|
||||
},
|
||||
McpTestCall {
|
||||
tool_name: "add_heading".to_string(),
|
||||
args: json!({
|
||||
"document_id": "test-doc-id",
|
||||
"text": "Test Heading",
|
||||
"level": 1
|
||||
}),
|
||||
expected_success: true,
|
||||
expected_result_keys: vec!["success".to_string()],
|
||||
},
|
||||
McpTestCall {
|
||||
tool_name: "extract_text".to_string(),
|
||||
args: json!({
|
||||
"document_id": "test-doc-id"
|
||||
}),
|
||||
expected_success: true,
|
||||
expected_result_keys: vec!["success".to_string(), "text".to_string()],
|
||||
},
|
||||
McpTestCall {
|
||||
tool_name: "get_metadata".to_string(),
|
||||
args: json!({
|
||||
"document_id": "test-doc-id"
|
||||
}),
|
||||
expected_success: true,
|
||||
expected_result_keys: vec!["success".to_string(), "metadata".to_string()],
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/// Generate performance test scenarios
|
||||
pub fn generate_performance_scenarios() -> Vec<PerformanceScenario> {
|
||||
vec![
|
||||
PerformanceScenario {
|
||||
name: "Small Document".to_string(),
|
||||
paragraph_count: 10,
|
||||
table_count: 1,
|
||||
list_count: 2,
|
||||
expected_max_time_ms: 1000,
|
||||
},
|
||||
PerformanceScenario {
|
||||
name: "Medium Document".to_string(),
|
||||
paragraph_count: 100,
|
||||
table_count: 5,
|
||||
list_count: 10,
|
||||
expected_max_time_ms: 5000,
|
||||
},
|
||||
PerformanceScenario {
|
||||
name: "Large Document".to_string(),
|
||||
paragraph_count: 500,
|
||||
table_count: 20,
|
||||
list_count: 30,
|
||||
expected_max_time_ms: 15000,
|
||||
},
|
||||
PerformanceScenario {
|
||||
name: "Extra Large Document".to_string(),
|
||||
paragraph_count: 1000,
|
||||
table_count: 50,
|
||||
list_count: 50,
|
||||
expected_max_time_ms: 30000,
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/// Complexity levels for generated paragraphs
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ParagraphComplexity {
|
||||
Simple,
|
||||
Medium,
|
||||
Complex,
|
||||
}
|
||||
|
||||
/// Content types for generated tables
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum TableContentType {
|
||||
Generic,
|
||||
Financial,
|
||||
Personnel,
|
||||
Technical,
|
||||
}
|
||||
|
||||
/// Categories for generated lists
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ListCategory {
|
||||
Tasks,
|
||||
Features,
|
||||
Requirements,
|
||||
Benefits,
|
||||
}
|
||||
|
||||
/// Business data set for realistic testing
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BusinessDataSet {
|
||||
pub companies: Vec<String>,
|
||||
pub departments: Vec<String>,
|
||||
pub positions: Vec<String>,
|
||||
pub locations: Vec<String>,
|
||||
}
|
||||
|
||||
/// MCP tool call test data
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct McpTestCall {
|
||||
pub tool_name: String,
|
||||
pub args: Value,
|
||||
pub expected_success: bool,
|
||||
pub expected_result_keys: Vec<String>,
|
||||
}
|
||||
|
||||
/// Performance test scenario data
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerformanceScenario {
|
||||
pub name: String,
|
||||
pub paragraph_count: usize,
|
||||
pub table_count: usize,
|
||||
pub list_count: usize,
|
||||
pub expected_max_time_ms: u64,
|
||||
}
|
||||
|
||||
/// Utility functions for test data validation
|
||||
pub struct TestDataValidator;
|
||||
|
||||
impl TestDataValidator {
|
||||
/// Validate that text contains expected content
|
||||
pub fn validate_text_content(text: &str, expected_keywords: &[&str]) -> bool {
|
||||
expected_keywords.iter().all(|keyword| text.contains(keyword))
|
||||
}
|
||||
|
||||
/// Validate table structure
|
||||
pub fn validate_table_structure(rows: &[Vec<String>], expected_cols: usize) -> bool {
|
||||
!rows.is_empty() && rows.iter().all(|row| row.len() == expected_cols)
|
||||
}
|
||||
|
||||
/// Validate MCP response structure
|
||||
pub fn validate_mcp_response(response: &Value, expected_keys: &[String]) -> bool {
|
||||
expected_keys.iter().all(|key| response.get(key).is_some())
|
||||
}
|
||||
|
||||
/// Generate hash for test data consistency checking
|
||||
pub fn generate_content_hash(content: &str) -> u64 {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
content.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user