cache.rs
1 use crate::types::*; 2 use anyhow::{anyhow, Result}; 3 use blake3::Hasher; 4 use quote::ToTokens; 5 use regex::Regex; 6 use serde::{Deserialize, Serialize}; 7 use std::collections::HashMap; 8 use std::path::{Path, PathBuf}; 9 use syn::{parse_file, Attribute, File, Item, ItemEnum, ItemFn, ItemMod, ItemStruct}; 10 11 #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] 12 pub struct CacheKey { 13 pub content_hash: ContentHash, 14 pub config_hash: ConfigHash, 15 pub tool_versions: ToolVersions, 16 pub layer: Layer, 17 } 18 19 impl CacheKey { 20 /// Create a new cache key for the given target and layer 21 pub fn new( 22 target: &crate::verification::Target, 23 layer: Layer, 24 config_hash: &str, 25 ) -> Result<Self> { 26 let content_hash = Self::compute_content_hash(target)?; 27 let tool_versions = Self::get_tool_versions()?; 28 29 Ok(Self { 30 content_hash, 31 config_hash: ConfigHash(config_hash.to_string()), 32 tool_versions, 33 layer, 34 }) 35 } 36 37 /// Compute content hash for a verification target 38 fn compute_content_hash(target: &crate::verification::Target) -> Result<ContentHash> { 39 let mut hasher = Hasher::new(); 40 41 match target { 42 crate::verification::Target::RustFile(path) => { 43 let ast = parse_file( 44 &std::fs::read_to_string(path) 45 .map_err(|e| anyhow!("Failed to read Rust file {:?}: {}", path, e))?, 46 )?; 47 let normalized = Self::normalize_ast(&ast); 48 hasher.update(normalized.as_bytes()); 49 } 50 crate::verification::Target::FormalSpec(path) => { 51 let spec = std::fs::read_to_string(path) 52 .map_err(|e| anyhow!("Failed to read formal spec {:?}: {}", path, e))?; 53 let normalized = Self::normalize_spec(&spec); 54 hasher.update(normalized.as_bytes()); 55 } 56 crate::verification::Target::Module(module_path) => { 57 // For modules, we hash the module path directly 58 // TODO: Implement module content hashing 59 hasher.update(module_path.as_bytes()); 60 } 61 } 62 63 Ok(ContentHash(hex::encode(hasher.finalize().as_bytes()))) 64 } 65 66 /// Normalize Rust AST by removing comments, whitespace, and other irrelevant details 67 fn normalize_ast(ast: &File) -> String { 68 let mut normalized_items = Vec::new(); 69 70 for item in &ast.items { 71 match item { 72 Item::Fn(item_fn) => { 73 // Normalize function signature and body structure 74 let normalized_item = Self::normalize_function(item_fn); 75 normalized_items.push(Item::Fn(normalized_item)); 76 } 77 Item::Struct(item_struct) => { 78 // Normalize struct definition 79 let normalized_item = Self::normalize_struct(item_struct); 80 normalized_items.push(Item::Struct(normalized_item)); 81 } 82 Item::Enum(item_enum) => { 83 // Normalize enum definition 84 let normalized_item = Self::normalize_enum(item_enum); 85 normalized_items.push(Item::Enum(normalized_item)); 86 } 87 Item::Mod(item_mod) => { 88 // Normalize module declaration 89 let normalized_item = Self::normalize_module(item_mod); 90 normalized_items.push(Item::Mod(normalized_item)); 91 } 92 _ => { 93 // Include other items as-is for now 94 normalized_items.push(item.clone()); 95 } 96 } 97 } 98 99 // Reconstruct the normalized AST and convert to string 100 let normalized_file = File { 101 shebang: ast.shebang.clone(), 102 attrs: ast.attrs.clone(), 103 items: normalized_items, 104 }; 105 106 normalized_file.to_token_stream().to_string() 107 } 108 109 /// Normalize a function item 110 fn normalize_function(item_fn: &ItemFn) -> ItemFn { 111 // Remove attributes that don't affect verification 112 let attrs: Vec<Attribute> = item_fn 113 .attrs 114 .iter() 115 .filter(|attr| { 116 // Keep verification attributes, remove others like #[cfg(test)] 117 attr.path().is_ident("verification") 118 || !attr 119 .path() 120 .segments 121 .first() 122 .map(|seg| seg.ident == "cfg") 123 .unwrap_or(false) 124 }) 125 .cloned() 126 .collect(); 127 128 // Normalize function body by removing comments and normalizing whitespace 129 let normalized_block = Self::normalize_block(&item_fn.block); 130 131 ItemFn { 132 attrs, 133 vis: item_fn.vis.clone(), 134 sig: item_fn.sig.clone(), 135 block: Box::new(normalized_block), 136 } 137 } 138 139 /// Normalize a block by removing comments and normalizing structure 140 fn normalize_block(block: &syn::Block) -> syn::Block { 141 use syn::Stmt; 142 143 let mut normalized_stmts = Vec::new(); 144 145 for stmt in &block.stmts { 146 match stmt { 147 Stmt::Local(_local) => { 148 // Keep local variable declarations as-is for now 149 normalized_stmts.push(stmt.clone()); 150 } 151 Stmt::Item(_item) => { 152 // Recursively normalize nested items 153 normalized_stmts.push(stmt.clone()); 154 } 155 Stmt::Expr(expr, semi) => { 156 // Keep expressions but could normalize further 157 normalized_stmts.push(Stmt::Expr(expr.clone(), *semi)); 158 } 159 Stmt::Macro(_mac) => { 160 // Keep macro calls as-is 161 normalized_stmts.push(stmt.clone()); 162 } 163 } 164 } 165 166 syn::Block { 167 brace_token: block.brace_token, 168 stmts: normalized_stmts, 169 } 170 } 171 172 /// Normalize a struct item 173 fn normalize_struct(item_struct: &ItemStruct) -> ItemStruct { 174 // Remove attributes that don't affect verification 175 let attrs: Vec<Attribute> = item_struct 176 .attrs 177 .iter() 178 .filter(|attr| attr.path().is_ident("verification")) 179 .cloned() 180 .collect(); 181 182 ItemStruct { 183 attrs, 184 vis: item_struct.vis.clone(), 185 struct_token: item_struct.struct_token, 186 ident: item_struct.ident.clone(), 187 generics: item_struct.generics.clone(), 188 fields: item_struct.fields.clone(), 189 semi_token: item_struct.semi_token, 190 } 191 } 192 193 /// Normalize an enum item 194 fn normalize_enum(item_enum: &ItemEnum) -> ItemEnum { 195 // Remove attributes that don't affect verification 196 let attrs: Vec<Attribute> = item_enum 197 .attrs 198 .iter() 199 .filter(|attr| attr.path().is_ident("verification")) 200 .cloned() 201 .collect(); 202 203 ItemEnum { 204 attrs, 205 vis: item_enum.vis.clone(), 206 enum_token: item_enum.enum_token, 207 ident: item_enum.ident.clone(), 208 generics: item_enum.generics.clone(), 209 brace_token: item_enum.brace_token, 210 variants: item_enum.variants.clone(), 211 } 212 } 213 214 /// Normalize a module item 215 fn normalize_module(item_mod: &ItemMod) -> ItemMod { 216 // Remove attributes that don't affect verification 217 let attrs: Vec<Attribute> = item_mod 218 .attrs 219 .iter() 220 .filter(|attr| attr.path().is_ident("verification")) 221 .cloned() 222 .collect(); 223 224 ItemMod { 225 attrs, 226 vis: item_mod.vis.clone(), 227 unsafety: item_mod.unsafety, 228 mod_token: item_mod.mod_token, 229 ident: item_mod.ident.clone(), 230 content: item_mod.content.clone(), 231 semi: item_mod.semi, 232 } 233 } 234 235 /// Normalize formal specification content 236 fn normalize_spec(spec: &str) -> String { 237 // Remove comments and normalize whitespace for TLA+ and Alloy specs 238 let mut normalized = String::new(); 239 let mut chars = spec.chars().peekable(); 240 let mut in_line_comment = false; 241 let mut in_block_comment = false; 242 let mut in_string = false; 243 let mut escape_next = false; 244 245 while let Some(char) = chars.next() { 246 if escape_next { 247 if in_string { 248 normalized.push('\\'); 249 normalized.push(char); 250 } 251 escape_next = false; 252 continue; 253 } 254 255 match char { 256 '\\' if in_string => { 257 escape_next = true; 258 continue; 259 } 260 '"' if !in_line_comment && !in_block_comment => { 261 in_string = !in_string; 262 normalized.push(char); 263 } 264 '/' if !in_string && !in_line_comment && !in_block_comment => { 265 if let Some(&next_char) = chars.peek() { 266 if next_char == '/' { 267 // Start of line comment 268 chars.next(); // consume the second '/' 269 in_line_comment = true; 270 continue; 271 } else if next_char == '*' { 272 // Start of block comment 273 chars.next(); // consume the '*' 274 in_block_comment = true; 275 continue; 276 } 277 } 278 normalized.push(char); 279 } 280 '*' if in_block_comment && !in_string => { 281 if let Some(&next_char) = chars.peek() { 282 if next_char == '/' { 283 // End of block comment 284 chars.next(); // consume the '/' 285 in_block_comment = false; 286 continue; 287 } 288 } 289 } 290 '\n' | '\r' => { 291 if in_line_comment { 292 in_line_comment = false; 293 } 294 if !in_block_comment && !in_string { 295 // Normalize line breaks to single space 296 if !normalized.ends_with(' ') && !normalized.is_empty() { 297 normalized.push(' '); 298 } 299 } 300 } 301 c if c.is_whitespace() => { 302 if !in_line_comment && !in_block_comment && !in_string { 303 // Normalize whitespace to single space 304 if !normalized.ends_with(' ') && !normalized.is_empty() { 305 normalized.push(' '); 306 } 307 } 308 } 309 c => { 310 if !in_line_comment && !in_block_comment { 311 normalized.push(c); 312 } 313 } 314 } 315 } 316 317 normalized.trim().to_string() 318 } 319 320 /// Get current tool versions for cache invalidation 321 fn get_tool_versions() -> Result<ToolVersions> { 322 let ferris_proof_version = env!("CARGO_PKG_VERSION").to_string(); 323 let mut external_tools = Vec::new(); 324 325 // Check for common external tools with their version commands 326 let tools_to_check = vec![ 327 ("tlc", "TLA+ TLC", vec!["--version", "-version"]), 328 ("java", "Java (for TLA+)", vec!["-version"]), 329 ("alloy", "Alloy Analyzer", vec!["--version"]), 330 ("kani", "Kani Verifier", vec!["--version"]), 331 ("cargo", "Cargo", vec!["--version"]), 332 ("rustc", "Rust Compiler", vec!["--version"]), 333 ]; 334 335 for (tool_name, display_name, version_args) in tools_to_check { 336 if let Ok(version) = Self::get_tool_version_with_args(tool_name, &version_args) { 337 external_tools.push((display_name.to_string(), version)); 338 } 339 } 340 341 // Add Rust toolchain information 342 if let Ok(rustc_version) = Self::get_rustc_commit_hash() { 343 external_tools.push(("Rust Commit".to_string(), rustc_version)); 344 } 345 346 Ok(ToolVersions { 347 ferris_proof: ferris_proof_version, 348 external_tools, 349 }) 350 } 351 352 /// Get version of an external tool with multiple possible version arguments 353 fn get_tool_version_with_args(tool_name: &str, version_args: &[&str]) -> Result<String> { 354 for &arg in version_args { 355 if let Ok(version) = Self::get_tool_version_with_arg(tool_name, arg) { 356 return Ok(version); 357 } 358 } 359 Err(anyhow!( 360 "Could not determine version for tool: {}", 361 tool_name 362 )) 363 } 364 365 /// Get version of an external tool with a specific argument 366 fn get_tool_version_with_arg(tool_name: &str, version_arg: &str) -> Result<String> { 367 let output = std::process::Command::new(tool_name) 368 .arg(version_arg) 369 .output(); 370 371 match output { 372 Ok(result) => { 373 let stdout = String::from_utf8_lossy(&result.stdout); 374 let stderr = String::from_utf8_lossy(&result.stderr); 375 376 // Try stdout first, then stderr (some tools output version to stderr) 377 let version_text = if !stdout.trim().is_empty() { 378 stdout 379 } else { 380 stderr 381 }; 382 383 // Extract version string (improved parsing) 384 let version_line = version_text.lines().next().unwrap_or(""); 385 let version = Self::extract_version_from_line(version_line); 386 387 if !version.is_empty() { 388 Ok(version) 389 } else { 390 Err(anyhow!( 391 "Could not parse version from output: {}", 392 version_line 393 )) 394 } 395 } 396 Err(e) => Err(anyhow!("Failed to execute tool {}: {}", tool_name, e)), 397 } 398 } 399 400 /// Extract version string from a line of text 401 fn extract_version_from_line(line: &str) -> String { 402 // Look for semantic version patterns (x.y.z) 403 if let Ok(version_regex) = Regex::new(r"\b(\d+\.\d+\.\d+(?:-[a-zA-Z0-9.-]+)?)\b") { 404 if let Some(captures) = version_regex.captures(line) { 405 return captures.get(1).unwrap().as_str().to_string(); 406 } 407 } 408 409 // Look for simpler version patterns (x.y) 410 if let Ok(simple_version_regex) = Regex::new(r"\b(\d+\.\d+)\b") { 411 if let Some(captures) = simple_version_regex.captures(line) { 412 return captures.get(1).unwrap().as_str().to_string(); 413 } 414 } 415 416 // Fallback: look for any sequence of digits and dots 417 if let Ok(fallback_regex) = Regex::new(r"\b(\d+(?:\.\d+)*)\b") { 418 if let Some(captures) = fallback_regex.captures(line) { 419 return captures.get(1).unwrap().as_str().to_string(); 420 } 421 } 422 423 "unknown".to_string() 424 } 425 426 /// Get Rust compiler commit hash for more precise cache invalidation 427 fn get_rustc_commit_hash() -> Result<String> { 428 let output = std::process::Command::new("rustc") 429 .arg("--version") 430 .arg("--verbose") 431 .output(); 432 433 match output { 434 Ok(result) if result.status.success() => { 435 let stdout = String::from_utf8_lossy(&result.stdout); 436 for line in stdout.lines() { 437 if line.starts_with("commit-hash:") { 438 return Ok(line 439 .split(':') 440 .nth(1) 441 .unwrap_or("unknown") 442 .trim() 443 .to_string()); 444 } 445 } 446 Err(anyhow!("Could not find commit hash in rustc output")) 447 } 448 Ok(result) => Err(anyhow!( 449 "rustc returned non-zero exit code: {}", 450 result.status 451 )), 452 Err(e) => Err(anyhow!("Failed to execute rustc: {}", e)), 453 } 454 } 455 } 456 457 #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] 458 pub struct ContentHash(pub String); 459 460 #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] 461 pub struct ConfigHash(pub String); 462 463 #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] 464 pub struct ToolVersions { 465 pub ferris_proof: String, 466 pub external_tools: Vec<(String, String)>, // Use Vec instead of HashMap for Hash trait 467 } 468 469 pub struct VerificationCache { 470 cache_dir: PathBuf, 471 entries: HashMap<CacheKey, CacheEntry>, 472 persistent_storage: Option<PersistentStorage>, 473 } 474 475 #[derive(Debug, Clone, Serialize, Deserialize)] 476 pub struct CacheEntry { 477 pub result: LayerResult, 478 pub timestamp: chrono::DateTime<chrono::Utc>, 479 pub ttl: std::time::Duration, 480 pub metadata: CacheMetadata, 481 } 482 483 #[derive(Debug, Clone, Serialize, Deserialize)] 484 pub struct CacheMetadata { 485 pub file_size: u64, 486 pub execution_time: std::time::Duration, 487 pub memory_usage: u64, 488 pub cache_hit_count: u32, 489 } 490 491 pub struct PersistentStorage { 492 cache_dir: PathBuf, 493 } 494 495 impl VerificationCache { 496 pub fn new() -> Self { 497 let cache_dir = std::env::temp_dir().join("ferris-proof-cache"); 498 std::fs::create_dir_all(&cache_dir).ok(); 499 500 let persistent_storage = PersistentStorage::new(&cache_dir); 501 502 Self { 503 cache_dir: cache_dir.clone(), 504 entries: HashMap::new(), 505 persistent_storage: Some(persistent_storage), 506 } 507 } 508 509 pub fn with_cache_dir(cache_dir: PathBuf) -> Self { 510 std::fs::create_dir_all(&cache_dir).ok(); 511 512 let persistent_storage = PersistentStorage::new(&cache_dir); 513 514 Self { 515 cache_dir: cache_dir.clone(), 516 entries: HashMap::new(), 517 persistent_storage: Some(persistent_storage), 518 } 519 } 520 521 /// Get cache entry, checking for expiration and validity 522 pub fn get(&self, key: &CacheKey) -> Option<&CacheEntry> { 523 if let Some(entry) = self.entries.get(key) { 524 if self.is_entry_valid(entry) { 525 return Some(entry); 526 } 527 } 528 None 529 } 530 531 /// Check if a cache entry is still valid (not expired) 532 fn is_entry_valid(&self, entry: &CacheEntry) -> bool { 533 let now = chrono::Utc::now(); 534 let age = now.signed_duration_since(entry.timestamp); 535 age.to_std().unwrap_or(std::time::Duration::MAX) < entry.ttl 536 } 537 538 /// Store cache entry with TTL and automatic persistence 539 pub fn store(&mut self, key: CacheKey, entry: CacheEntry) { 540 self.entries.insert(key.clone(), entry.clone()); 541 542 // Persist to disk if persistent storage is available 543 if let Some(storage) = &self.persistent_storage { 544 if let Err(e) = storage.store(&key, &entry) { 545 tracing::warn!("Failed to persist cache entry: {}", e); 546 } 547 } 548 } 549 550 /// Invalidate cache entry (remove from memory and disk) 551 pub fn invalidate(&mut self, key: &CacheKey) { 552 self.entries.remove(key); 553 554 if let Some(storage) = &self.persistent_storage { 555 if let Err(e) = storage.remove(key) { 556 tracing::warn!("Failed to remove cache entry from disk: {}", e); 557 } 558 } 559 } 560 561 /// Clear all cache entries (memory and disk) 562 pub fn clear(&mut self) { 563 self.entries.clear(); 564 565 if let Some(storage) = &self.persistent_storage { 566 if let Err(e) = storage.clear() { 567 tracing::warn!("Failed to clear cache from disk: {}", e); 568 } 569 } 570 } 571 572 /// Load cache from persistent storage with validation 573 pub fn load_from_disk(&mut self) -> Result<()> { 574 if let Some(storage) = &self.persistent_storage { 575 let loaded_entries = storage.load_all()?; 576 577 // Filter out expired entries during load 578 let _now = chrono::Utc::now(); 579 for (key, entry) in loaded_entries { 580 if self.is_entry_valid(&entry) { 581 self.entries.insert(key, entry); 582 } else { 583 // Remove expired entries from disk 584 let _ = storage.remove(&key); 585 } 586 } 587 } 588 Ok(()) 589 } 590 591 /// Save cache to persistent storage 592 pub fn save_to_disk(&self) -> Result<()> { 593 if let Some(storage) = &self.persistent_storage { 594 storage.save_all(&self.entries)?; 595 } 596 Ok(()) 597 } 598 599 /// Cleanup expired entries from memory and disk 600 pub fn cleanup_expired(&mut self) -> Result<usize> { 601 let now = chrono::Utc::now(); 602 let mut expired_keys = Vec::new(); 603 604 for (key, entry) in &self.entries { 605 let age = now.signed_duration_since(entry.timestamp); 606 if age.to_std().unwrap_or(std::time::Duration::MAX) >= entry.ttl { 607 expired_keys.push(key.clone()); 608 } 609 } 610 611 let expired_count = expired_keys.len(); 612 for key in expired_keys { 613 self.invalidate(&key); 614 } 615 616 Ok(expired_count) 617 } 618 619 /// Get comprehensive cache statistics 620 pub fn statistics(&self) -> CacheStatistics { 621 let total_entries = self.entries.len(); 622 let mut expired_entries = 0; 623 let mut total_size = 0u64; 624 let now = chrono::Utc::now(); 625 626 for entry in self.entries.values() { 627 let age = now.signed_duration_since(entry.timestamp); 628 if age.to_std().unwrap_or(std::time::Duration::MAX) >= entry.ttl { 629 expired_entries += 1; 630 } 631 total_size += entry.metadata.file_size; 632 } 633 634 CacheStatistics { 635 total_entries, 636 expired_entries, 637 valid_entries: total_entries - expired_entries, 638 total_size_bytes: total_size, 639 cache_dir: self.cache_dir.clone(), 640 } 641 } 642 643 /// Calculate cache hit rate 644 pub fn hit_rate(&self, hits: u64, misses: u64) -> f64 { 645 if hits + misses == 0 { 646 0.0 647 } else { 648 hits as f64 / (hits + misses) as f64 649 } 650 } 651 652 /// Validate cache integrity and return any errors found 653 pub fn validate_integrity(&self) -> Result<Vec<String>> { 654 if let Some(storage) = &self.persistent_storage { 655 storage.validate() 656 } else { 657 Ok(Vec::new()) 658 } 659 } 660 661 /// Get total cache size on disk 662 pub fn disk_size(&self) -> Result<u64> { 663 if let Some(storage) = &self.persistent_storage { 664 storage.cache_size() 665 } else { 666 Ok(0) 667 } 668 } 669 670 /// Compact cache by removing expired entries and optimizing storage 671 pub fn compact(&mut self) -> Result<CompactionResult> { 672 let initial_entries = self.entries.len(); 673 let initial_size = self.disk_size().unwrap_or(0); 674 675 // Remove expired entries 676 let expired_removed = self.cleanup_expired()?; 677 678 // Save compacted cache to disk 679 self.save_to_disk()?; 680 681 let final_entries = self.entries.len(); 682 let final_size = self.disk_size().unwrap_or(0); 683 684 Ok(CompactionResult { 685 entries_before: initial_entries, 686 entries_after: final_entries, 687 entries_removed: expired_removed, 688 size_before: initial_size, 689 size_after: final_size, 690 size_saved: initial_size.saturating_sub(final_size), 691 }) 692 } 693 } 694 695 impl PersistentStorage { 696 fn new(cache_dir: &Path) -> Self { 697 Self { 698 cache_dir: cache_dir.to_path_buf(), 699 } 700 } 701 702 fn store(&self, key: &CacheKey, entry: &CacheEntry) -> Result<()> { 703 let file_name = self.key_to_filename(key); 704 let file_path = self.cache_dir.join(file_name); 705 706 // Create cache directory if it doesn't exist 707 std::fs::create_dir_all(&self.cache_dir)?; 708 709 let serialized = bincode::serialize(&(key, entry))?; 710 711 // Use zstd compression with level 3 for good balance of speed/compression 712 let compressed = zstd::encode_all(serialized.as_slice(), 3)?; 713 714 // Write atomically using a temporary file 715 let temp_path = file_path.with_extension("tmp"); 716 std::fs::write(&temp_path, compressed)?; 717 std::fs::rename(temp_path, file_path)?; 718 719 Ok(()) 720 } 721 722 fn remove(&self, key: &CacheKey) -> Result<()> { 723 let file_name = self.key_to_filename(key); 724 let file_path = self.cache_dir.join(file_name); 725 726 if file_path.exists() { 727 std::fs::remove_file(file_path)?; 728 } 729 Ok(()) 730 } 731 732 fn load_all(&self) -> Result<HashMap<CacheKey, CacheEntry>> { 733 let mut entries = HashMap::new(); 734 735 if !self.cache_dir.exists() { 736 return Ok(entries); 737 } 738 739 // Recursively search for cache files in subdirectories 740 fn visit_dir( 741 dir: &Path, 742 entries: &mut HashMap<CacheKey, CacheEntry>, 743 storage: &PersistentStorage, 744 ) -> Result<()> { 745 for entry in std::fs::read_dir(dir)? { 746 let entry = entry?; 747 let path = entry.path(); 748 749 if path.is_dir() { 750 // Recursively visit subdirectories 751 visit_dir(&path, entries, storage)?; 752 } else if path.is_file() 753 && path.extension().and_then(|s| s.to_str()) == Some("cache") 754 { 755 match storage.load_entry(&path) { 756 Ok((key, cache_entry)) => { 757 entries.insert(key, cache_entry); 758 } 759 Err(e) => { 760 tracing::warn!("Failed to load cache entry {:?}: {}", path, e); 761 // Optionally remove corrupted cache files 762 let _ = std::fs::remove_file(&path); 763 } 764 } 765 } 766 } 767 Ok(()) 768 } 769 770 visit_dir(&self.cache_dir, &mut entries, self)?; 771 Ok(entries) 772 } 773 774 fn save_all(&self, entries: &HashMap<CacheKey, CacheEntry>) -> Result<()> { 775 std::fs::create_dir_all(&self.cache_dir)?; 776 777 for (key, entry) in entries { 778 if let Err(e) = self.store(key, entry) { 779 tracing::warn!("Failed to save cache entry: {}", e); 780 // Continue with other entries even if one fails 781 } 782 } 783 Ok(()) 784 } 785 786 fn clear(&self) -> Result<()> { 787 if !self.cache_dir.exists() { 788 return Ok(()); 789 } 790 791 // Recursively remove all cache files 792 fn visit_dir(dir: &Path) -> Result<()> { 793 for entry in std::fs::read_dir(dir)? { 794 let entry = entry?; 795 let path = entry.path(); 796 797 if path.is_dir() { 798 // Recursively visit subdirectories 799 visit_dir(&path)?; 800 // Try to remove empty directory 801 let _ = std::fs::remove_dir(&path); 802 } else if path.is_file() 803 && path.extension().and_then(|s| s.to_str()) == Some("cache") 804 { 805 if let Err(e) = std::fs::remove_file(&path) { 806 tracing::warn!("Failed to remove cache file {:?}: {}", path, e); 807 } 808 } 809 } 810 Ok(()) 811 } 812 813 visit_dir(&self.cache_dir)?; 814 Ok(()) 815 } 816 817 fn load_entry(&self, path: &Path) -> Result<(CacheKey, CacheEntry)> { 818 let compressed = std::fs::read(path)?; 819 let serialized = zstd::decode_all(compressed.as_slice())?; 820 let (key, entry): (CacheKey, CacheEntry) = bincode::deserialize(&serialized)?; 821 Ok((key, entry)) 822 } 823 824 /// Generate content-addressed filename from cache key 825 fn key_to_filename(&self, key: &CacheKey) -> String { 826 use std::collections::hash_map::DefaultHasher; 827 use std::hash::{Hash, Hasher}; 828 829 let mut hasher = DefaultHasher::new(); 830 key.hash(&mut hasher); 831 let hash = hasher.finish(); 832 833 // Use content-addressed storage: first two hex digits as subdirectory 834 let subdir = format!("{:02x}", (hash >> 56) & 0xFF); 835 let filename = format!("{:016x}.cache", hash); 836 837 // Create subdirectory path 838 let subdir_path = self.cache_dir.join(&subdir); 839 if let Err(e) = std::fs::create_dir_all(&subdir_path) { 840 tracing::warn!("Failed to create cache subdirectory {}: {}", subdir, e); 841 } 842 843 format!("{}/{}", subdir, filename) 844 } 845 846 /// Get cache directory size in bytes 847 pub fn cache_size(&self) -> Result<u64> { 848 let mut total_size = 0u64; 849 850 if !self.cache_dir.exists() { 851 return Ok(0); 852 } 853 854 fn visit_dir(dir: &Path, total: &mut u64) -> Result<()> { 855 for entry in std::fs::read_dir(dir)? { 856 let entry = entry?; 857 let path = entry.path(); 858 859 if path.is_dir() { 860 visit_dir(&path, total)?; 861 } else if path.is_file() { 862 if let Ok(metadata) = entry.metadata() { 863 *total += metadata.len(); 864 } 865 } 866 } 867 Ok(()) 868 } 869 870 visit_dir(&self.cache_dir, &mut total_size)?; 871 Ok(total_size) 872 } 873 874 /// Validate cache integrity 875 pub fn validate(&self) -> Result<Vec<String>> { 876 let mut errors = Vec::new(); 877 878 if !self.cache_dir.exists() { 879 return Ok(errors); 880 } 881 882 for entry in std::fs::read_dir(&self.cache_dir)? { 883 let entry = entry?; 884 let path = entry.path(); 885 886 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("cache") { 887 if let Err(e) = self.load_entry(&path) { 888 errors.push(format!("Corrupted cache file {:?}: {}", path, e)); 889 } 890 } 891 } 892 893 Ok(errors) 894 } 895 } 896 897 #[derive(Debug, Clone)] 898 pub struct CacheStatistics { 899 pub total_entries: usize, 900 pub expired_entries: usize, 901 pub valid_entries: usize, 902 pub total_size_bytes: u64, 903 pub cache_dir: PathBuf, 904 } 905 906 #[derive(Debug, Clone)] 907 pub struct CompactionResult { 908 pub entries_before: usize, 909 pub entries_after: usize, 910 pub entries_removed: usize, 911 pub size_before: u64, 912 pub size_after: u64, 913 pub size_saved: u64, 914 } 915 916 impl Default for VerificationCache { 917 fn default() -> Self { 918 Self::new() 919 } 920 }