/ ferris-proof-core / src / cache.rs
cache.rs
  1  use crate::types::*;
  2  use anyhow::{anyhow, Result};
  3  use blake3::Hasher;
  4  use quote::ToTokens;
  5  use regex::Regex;
  6  use serde::{Deserialize, Serialize};
  7  use std::collections::HashMap;
  8  use std::path::{Path, PathBuf};
  9  use syn::{parse_file, Attribute, File, Item, ItemEnum, ItemFn, ItemMod, ItemStruct};
 10  
 11  #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
 12  pub struct CacheKey {
 13      pub content_hash: ContentHash,
 14      pub config_hash: ConfigHash,
 15      pub tool_versions: ToolVersions,
 16      pub layer: Layer,
 17  }
 18  
 19  impl CacheKey {
 20      /// Create a new cache key for the given target and layer
 21      pub fn new(
 22          target: &crate::verification::Target,
 23          layer: Layer,
 24          config_hash: &str,
 25      ) -> Result<Self> {
 26          let content_hash = Self::compute_content_hash(target)?;
 27          let tool_versions = Self::get_tool_versions()?;
 28  
 29          Ok(Self {
 30              content_hash,
 31              config_hash: ConfigHash(config_hash.to_string()),
 32              tool_versions,
 33              layer,
 34          })
 35      }
 36  
 37      /// Compute content hash for a verification target
 38      fn compute_content_hash(target: &crate::verification::Target) -> Result<ContentHash> {
 39          let mut hasher = Hasher::new();
 40  
 41          match target {
 42              crate::verification::Target::RustFile(path) => {
 43                  let ast = parse_file(
 44                      &std::fs::read_to_string(path)
 45                          .map_err(|e| anyhow!("Failed to read Rust file {:?}: {}", path, e))?,
 46                  )?;
 47                  let normalized = Self::normalize_ast(&ast);
 48                  hasher.update(normalized.as_bytes());
 49              }
 50              crate::verification::Target::FormalSpec(path) => {
 51                  let spec = std::fs::read_to_string(path)
 52                      .map_err(|e| anyhow!("Failed to read formal spec {:?}: {}", path, e))?;
 53                  let normalized = Self::normalize_spec(&spec);
 54                  hasher.update(normalized.as_bytes());
 55              }
 56              crate::verification::Target::Module(module_path) => {
 57                  // For modules, we hash the module path directly
 58                  // TODO: Implement module content hashing
 59                  hasher.update(module_path.as_bytes());
 60              }
 61          }
 62  
 63          Ok(ContentHash(hex::encode(hasher.finalize().as_bytes())))
 64      }
 65  
 66      /// Normalize Rust AST by removing comments, whitespace, and other irrelevant details
 67      fn normalize_ast(ast: &File) -> String {
 68          let mut normalized_items = Vec::new();
 69  
 70          for item in &ast.items {
 71              match item {
 72                  Item::Fn(item_fn) => {
 73                      // Normalize function signature and body structure
 74                      let normalized_item = Self::normalize_function(item_fn);
 75                      normalized_items.push(Item::Fn(normalized_item));
 76                  }
 77                  Item::Struct(item_struct) => {
 78                      // Normalize struct definition
 79                      let normalized_item = Self::normalize_struct(item_struct);
 80                      normalized_items.push(Item::Struct(normalized_item));
 81                  }
 82                  Item::Enum(item_enum) => {
 83                      // Normalize enum definition
 84                      let normalized_item = Self::normalize_enum(item_enum);
 85                      normalized_items.push(Item::Enum(normalized_item));
 86                  }
 87                  Item::Mod(item_mod) => {
 88                      // Normalize module declaration
 89                      let normalized_item = Self::normalize_module(item_mod);
 90                      normalized_items.push(Item::Mod(normalized_item));
 91                  }
 92                  _ => {
 93                      // Include other items as-is for now
 94                      normalized_items.push(item.clone());
 95                  }
 96              }
 97          }
 98  
 99          // Reconstruct the normalized AST and convert to string
100          let normalized_file = File {
101              shebang: ast.shebang.clone(),
102              attrs: ast.attrs.clone(),
103              items: normalized_items,
104          };
105  
106          normalized_file.to_token_stream().to_string()
107      }
108  
109      /// Normalize a function item
110      fn normalize_function(item_fn: &ItemFn) -> ItemFn {
111          // Remove attributes that don't affect verification
112          let attrs: Vec<Attribute> = item_fn
113              .attrs
114              .iter()
115              .filter(|attr| {
116                  // Keep verification attributes, remove others like #[cfg(test)]
117                  attr.path().is_ident("verification")
118                      || !attr
119                          .path()
120                          .segments
121                          .first()
122                          .map(|seg| seg.ident == "cfg")
123                          .unwrap_or(false)
124              })
125              .cloned()
126              .collect();
127  
128          // Normalize function body by removing comments and normalizing whitespace
129          let normalized_block = Self::normalize_block(&item_fn.block);
130  
131          ItemFn {
132              attrs,
133              vis: item_fn.vis.clone(),
134              sig: item_fn.sig.clone(),
135              block: Box::new(normalized_block),
136          }
137      }
138  
139      /// Normalize a block by removing comments and normalizing structure
140      fn normalize_block(block: &syn::Block) -> syn::Block {
141          use syn::Stmt;
142  
143          let mut normalized_stmts = Vec::new();
144  
145          for stmt in &block.stmts {
146              match stmt {
147                  Stmt::Local(_local) => {
148                      // Keep local variable declarations as-is for now
149                      normalized_stmts.push(stmt.clone());
150                  }
151                  Stmt::Item(_item) => {
152                      // Recursively normalize nested items
153                      normalized_stmts.push(stmt.clone());
154                  }
155                  Stmt::Expr(expr, semi) => {
156                      // Keep expressions but could normalize further
157                      normalized_stmts.push(Stmt::Expr(expr.clone(), *semi));
158                  }
159                  Stmt::Macro(_mac) => {
160                      // Keep macro calls as-is
161                      normalized_stmts.push(stmt.clone());
162                  }
163              }
164          }
165  
166          syn::Block {
167              brace_token: block.brace_token,
168              stmts: normalized_stmts,
169          }
170      }
171  
172      /// Normalize a struct item
173      fn normalize_struct(item_struct: &ItemStruct) -> ItemStruct {
174          // Remove attributes that don't affect verification
175          let attrs: Vec<Attribute> = item_struct
176              .attrs
177              .iter()
178              .filter(|attr| attr.path().is_ident("verification"))
179              .cloned()
180              .collect();
181  
182          ItemStruct {
183              attrs,
184              vis: item_struct.vis.clone(),
185              struct_token: item_struct.struct_token,
186              ident: item_struct.ident.clone(),
187              generics: item_struct.generics.clone(),
188              fields: item_struct.fields.clone(),
189              semi_token: item_struct.semi_token,
190          }
191      }
192  
193      /// Normalize an enum item
194      fn normalize_enum(item_enum: &ItemEnum) -> ItemEnum {
195          // Remove attributes that don't affect verification
196          let attrs: Vec<Attribute> = item_enum
197              .attrs
198              .iter()
199              .filter(|attr| attr.path().is_ident("verification"))
200              .cloned()
201              .collect();
202  
203          ItemEnum {
204              attrs,
205              vis: item_enum.vis.clone(),
206              enum_token: item_enum.enum_token,
207              ident: item_enum.ident.clone(),
208              generics: item_enum.generics.clone(),
209              brace_token: item_enum.brace_token,
210              variants: item_enum.variants.clone(),
211          }
212      }
213  
214      /// Normalize a module item
215      fn normalize_module(item_mod: &ItemMod) -> ItemMod {
216          // Remove attributes that don't affect verification
217          let attrs: Vec<Attribute> = item_mod
218              .attrs
219              .iter()
220              .filter(|attr| attr.path().is_ident("verification"))
221              .cloned()
222              .collect();
223  
224          ItemMod {
225              attrs,
226              vis: item_mod.vis.clone(),
227              unsafety: item_mod.unsafety,
228              mod_token: item_mod.mod_token,
229              ident: item_mod.ident.clone(),
230              content: item_mod.content.clone(),
231              semi: item_mod.semi,
232          }
233      }
234  
235      /// Normalize formal specification content
236      fn normalize_spec(spec: &str) -> String {
237          // Remove comments and normalize whitespace for TLA+ and Alloy specs
238          let mut normalized = String::new();
239          let mut chars = spec.chars().peekable();
240          let mut in_line_comment = false;
241          let mut in_block_comment = false;
242          let mut in_string = false;
243          let mut escape_next = false;
244  
245          while let Some(char) = chars.next() {
246              if escape_next {
247                  if in_string {
248                      normalized.push('\\');
249                      normalized.push(char);
250                  }
251                  escape_next = false;
252                  continue;
253              }
254  
255              match char {
256                  '\\' if in_string => {
257                      escape_next = true;
258                      continue;
259                  }
260                  '"' if !in_line_comment && !in_block_comment => {
261                      in_string = !in_string;
262                      normalized.push(char);
263                  }
264                  '/' if !in_string && !in_line_comment && !in_block_comment => {
265                      if let Some(&next_char) = chars.peek() {
266                          if next_char == '/' {
267                              // Start of line comment
268                              chars.next(); // consume the second '/'
269                              in_line_comment = true;
270                              continue;
271                          } else if next_char == '*' {
272                              // Start of block comment
273                              chars.next(); // consume the '*'
274                              in_block_comment = true;
275                              continue;
276                          }
277                      }
278                      normalized.push(char);
279                  }
280                  '*' if in_block_comment && !in_string => {
281                      if let Some(&next_char) = chars.peek() {
282                          if next_char == '/' {
283                              // End of block comment
284                              chars.next(); // consume the '/'
285                              in_block_comment = false;
286                              continue;
287                          }
288                      }
289                  }
290                  '\n' | '\r' => {
291                      if in_line_comment {
292                          in_line_comment = false;
293                      }
294                      if !in_block_comment && !in_string {
295                          // Normalize line breaks to single space
296                          if !normalized.ends_with(' ') && !normalized.is_empty() {
297                              normalized.push(' ');
298                          }
299                      }
300                  }
301                  c if c.is_whitespace() => {
302                      if !in_line_comment && !in_block_comment && !in_string {
303                          // Normalize whitespace to single space
304                          if !normalized.ends_with(' ') && !normalized.is_empty() {
305                              normalized.push(' ');
306                          }
307                      }
308                  }
309                  c => {
310                      if !in_line_comment && !in_block_comment {
311                          normalized.push(c);
312                      }
313                  }
314              }
315          }
316  
317          normalized.trim().to_string()
318      }
319  
320      /// Get current tool versions for cache invalidation
321      fn get_tool_versions() -> Result<ToolVersions> {
322          let ferris_proof_version = env!("CARGO_PKG_VERSION").to_string();
323          let mut external_tools = Vec::new();
324  
325          // Check for common external tools with their version commands
326          let tools_to_check = vec![
327              ("tlc", "TLA+ TLC", vec!["--version", "-version"]),
328              ("java", "Java (for TLA+)", vec!["-version"]),
329              ("alloy", "Alloy Analyzer", vec!["--version"]),
330              ("kani", "Kani Verifier", vec!["--version"]),
331              ("cargo", "Cargo", vec!["--version"]),
332              ("rustc", "Rust Compiler", vec!["--version"]),
333          ];
334  
335          for (tool_name, display_name, version_args) in tools_to_check {
336              if let Ok(version) = Self::get_tool_version_with_args(tool_name, &version_args) {
337                  external_tools.push((display_name.to_string(), version));
338              }
339          }
340  
341          // Add Rust toolchain information
342          if let Ok(rustc_version) = Self::get_rustc_commit_hash() {
343              external_tools.push(("Rust Commit".to_string(), rustc_version));
344          }
345  
346          Ok(ToolVersions {
347              ferris_proof: ferris_proof_version,
348              external_tools,
349          })
350      }
351  
352      /// Get version of an external tool with multiple possible version arguments
353      fn get_tool_version_with_args(tool_name: &str, version_args: &[&str]) -> Result<String> {
354          for &arg in version_args {
355              if let Ok(version) = Self::get_tool_version_with_arg(tool_name, arg) {
356                  return Ok(version);
357              }
358          }
359          Err(anyhow!(
360              "Could not determine version for tool: {}",
361              tool_name
362          ))
363      }
364  
365      /// Get version of an external tool with a specific argument
366      fn get_tool_version_with_arg(tool_name: &str, version_arg: &str) -> Result<String> {
367          let output = std::process::Command::new(tool_name)
368              .arg(version_arg)
369              .output();
370  
371          match output {
372              Ok(result) => {
373                  let stdout = String::from_utf8_lossy(&result.stdout);
374                  let stderr = String::from_utf8_lossy(&result.stderr);
375  
376                  // Try stdout first, then stderr (some tools output version to stderr)
377                  let version_text = if !stdout.trim().is_empty() {
378                      stdout
379                  } else {
380                      stderr
381                  };
382  
383                  // Extract version string (improved parsing)
384                  let version_line = version_text.lines().next().unwrap_or("");
385                  let version = Self::extract_version_from_line(version_line);
386  
387                  if !version.is_empty() {
388                      Ok(version)
389                  } else {
390                      Err(anyhow!(
391                          "Could not parse version from output: {}",
392                          version_line
393                      ))
394                  }
395              }
396              Err(e) => Err(anyhow!("Failed to execute tool {}: {}", tool_name, e)),
397          }
398      }
399  
400      /// Extract version string from a line of text
401      fn extract_version_from_line(line: &str) -> String {
402          // Look for semantic version patterns (x.y.z)
403          if let Ok(version_regex) = Regex::new(r"\b(\d+\.\d+\.\d+(?:-[a-zA-Z0-9.-]+)?)\b") {
404              if let Some(captures) = version_regex.captures(line) {
405                  return captures.get(1).unwrap().as_str().to_string();
406              }
407          }
408  
409          // Look for simpler version patterns (x.y)
410          if let Ok(simple_version_regex) = Regex::new(r"\b(\d+\.\d+)\b") {
411              if let Some(captures) = simple_version_regex.captures(line) {
412                  return captures.get(1).unwrap().as_str().to_string();
413              }
414          }
415  
416          // Fallback: look for any sequence of digits and dots
417          if let Ok(fallback_regex) = Regex::new(r"\b(\d+(?:\.\d+)*)\b") {
418              if let Some(captures) = fallback_regex.captures(line) {
419                  return captures.get(1).unwrap().as_str().to_string();
420              }
421          }
422  
423          "unknown".to_string()
424      }
425  
426      /// Get Rust compiler commit hash for more precise cache invalidation
427      fn get_rustc_commit_hash() -> Result<String> {
428          let output = std::process::Command::new("rustc")
429              .arg("--version")
430              .arg("--verbose")
431              .output();
432  
433          match output {
434              Ok(result) if result.status.success() => {
435                  let stdout = String::from_utf8_lossy(&result.stdout);
436                  for line in stdout.lines() {
437                      if line.starts_with("commit-hash:") {
438                          return Ok(line
439                              .split(':')
440                              .nth(1)
441                              .unwrap_or("unknown")
442                              .trim()
443                              .to_string());
444                      }
445                  }
446                  Err(anyhow!("Could not find commit hash in rustc output"))
447              }
448              Ok(result) => Err(anyhow!(
449                  "rustc returned non-zero exit code: {}",
450                  result.status
451              )),
452              Err(e) => Err(anyhow!("Failed to execute rustc: {}", e)),
453          }
454      }
455  }
456  
457  #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
458  pub struct ContentHash(pub String);
459  
460  #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
461  pub struct ConfigHash(pub String);
462  
463  #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
464  pub struct ToolVersions {
465      pub ferris_proof: String,
466      pub external_tools: Vec<(String, String)>, // Use Vec instead of HashMap for Hash trait
467  }
468  
469  pub struct VerificationCache {
470      cache_dir: PathBuf,
471      entries: HashMap<CacheKey, CacheEntry>,
472      persistent_storage: Option<PersistentStorage>,
473  }
474  
475  #[derive(Debug, Clone, Serialize, Deserialize)]
476  pub struct CacheEntry {
477      pub result: LayerResult,
478      pub timestamp: chrono::DateTime<chrono::Utc>,
479      pub ttl: std::time::Duration,
480      pub metadata: CacheMetadata,
481  }
482  
483  #[derive(Debug, Clone, Serialize, Deserialize)]
484  pub struct CacheMetadata {
485      pub file_size: u64,
486      pub execution_time: std::time::Duration,
487      pub memory_usage: u64,
488      pub cache_hit_count: u32,
489  }
490  
491  pub struct PersistentStorage {
492      cache_dir: PathBuf,
493  }
494  
495  impl VerificationCache {
496      pub fn new() -> Self {
497          let cache_dir = std::env::temp_dir().join("ferris-proof-cache");
498          std::fs::create_dir_all(&cache_dir).ok();
499  
500          let persistent_storage = PersistentStorage::new(&cache_dir);
501  
502          Self {
503              cache_dir: cache_dir.clone(),
504              entries: HashMap::new(),
505              persistent_storage: Some(persistent_storage),
506          }
507      }
508  
509      pub fn with_cache_dir(cache_dir: PathBuf) -> Self {
510          std::fs::create_dir_all(&cache_dir).ok();
511  
512          let persistent_storage = PersistentStorage::new(&cache_dir);
513  
514          Self {
515              cache_dir: cache_dir.clone(),
516              entries: HashMap::new(),
517              persistent_storage: Some(persistent_storage),
518          }
519      }
520  
521      /// Get cache entry, checking for expiration and validity
522      pub fn get(&self, key: &CacheKey) -> Option<&CacheEntry> {
523          if let Some(entry) = self.entries.get(key) {
524              if self.is_entry_valid(entry) {
525                  return Some(entry);
526              }
527          }
528          None
529      }
530  
531      /// Check if a cache entry is still valid (not expired)
532      fn is_entry_valid(&self, entry: &CacheEntry) -> bool {
533          let now = chrono::Utc::now();
534          let age = now.signed_duration_since(entry.timestamp);
535          age.to_std().unwrap_or(std::time::Duration::MAX) < entry.ttl
536      }
537  
538      /// Store cache entry with TTL and automatic persistence
539      pub fn store(&mut self, key: CacheKey, entry: CacheEntry) {
540          self.entries.insert(key.clone(), entry.clone());
541  
542          // Persist to disk if persistent storage is available
543          if let Some(storage) = &self.persistent_storage {
544              if let Err(e) = storage.store(&key, &entry) {
545                  tracing::warn!("Failed to persist cache entry: {}", e);
546              }
547          }
548      }
549  
550      /// Invalidate cache entry (remove from memory and disk)
551      pub fn invalidate(&mut self, key: &CacheKey) {
552          self.entries.remove(key);
553  
554          if let Some(storage) = &self.persistent_storage {
555              if let Err(e) = storage.remove(key) {
556                  tracing::warn!("Failed to remove cache entry from disk: {}", e);
557              }
558          }
559      }
560  
561      /// Clear all cache entries (memory and disk)
562      pub fn clear(&mut self) {
563          self.entries.clear();
564  
565          if let Some(storage) = &self.persistent_storage {
566              if let Err(e) = storage.clear() {
567                  tracing::warn!("Failed to clear cache from disk: {}", e);
568              }
569          }
570      }
571  
572      /// Load cache from persistent storage with validation
573      pub fn load_from_disk(&mut self) -> Result<()> {
574          if let Some(storage) = &self.persistent_storage {
575              let loaded_entries = storage.load_all()?;
576  
577              // Filter out expired entries during load
578              let _now = chrono::Utc::now();
579              for (key, entry) in loaded_entries {
580                  if self.is_entry_valid(&entry) {
581                      self.entries.insert(key, entry);
582                  } else {
583                      // Remove expired entries from disk
584                      let _ = storage.remove(&key);
585                  }
586              }
587          }
588          Ok(())
589      }
590  
591      /// Save cache to persistent storage
592      pub fn save_to_disk(&self) -> Result<()> {
593          if let Some(storage) = &self.persistent_storage {
594              storage.save_all(&self.entries)?;
595          }
596          Ok(())
597      }
598  
599      /// Cleanup expired entries from memory and disk
600      pub fn cleanup_expired(&mut self) -> Result<usize> {
601          let now = chrono::Utc::now();
602          let mut expired_keys = Vec::new();
603  
604          for (key, entry) in &self.entries {
605              let age = now.signed_duration_since(entry.timestamp);
606              if age.to_std().unwrap_or(std::time::Duration::MAX) >= entry.ttl {
607                  expired_keys.push(key.clone());
608              }
609          }
610  
611          let expired_count = expired_keys.len();
612          for key in expired_keys {
613              self.invalidate(&key);
614          }
615  
616          Ok(expired_count)
617      }
618  
619      /// Get comprehensive cache statistics
620      pub fn statistics(&self) -> CacheStatistics {
621          let total_entries = self.entries.len();
622          let mut expired_entries = 0;
623          let mut total_size = 0u64;
624          let now = chrono::Utc::now();
625  
626          for entry in self.entries.values() {
627              let age = now.signed_duration_since(entry.timestamp);
628              if age.to_std().unwrap_or(std::time::Duration::MAX) >= entry.ttl {
629                  expired_entries += 1;
630              }
631              total_size += entry.metadata.file_size;
632          }
633  
634          CacheStatistics {
635              total_entries,
636              expired_entries,
637              valid_entries: total_entries - expired_entries,
638              total_size_bytes: total_size,
639              cache_dir: self.cache_dir.clone(),
640          }
641      }
642  
643      /// Calculate cache hit rate
644      pub fn hit_rate(&self, hits: u64, misses: u64) -> f64 {
645          if hits + misses == 0 {
646              0.0
647          } else {
648              hits as f64 / (hits + misses) as f64
649          }
650      }
651  
652      /// Validate cache integrity and return any errors found
653      pub fn validate_integrity(&self) -> Result<Vec<String>> {
654          if let Some(storage) = &self.persistent_storage {
655              storage.validate()
656          } else {
657              Ok(Vec::new())
658          }
659      }
660  
661      /// Get total cache size on disk
662      pub fn disk_size(&self) -> Result<u64> {
663          if let Some(storage) = &self.persistent_storage {
664              storage.cache_size()
665          } else {
666              Ok(0)
667          }
668      }
669  
670      /// Compact cache by removing expired entries and optimizing storage
671      pub fn compact(&mut self) -> Result<CompactionResult> {
672          let initial_entries = self.entries.len();
673          let initial_size = self.disk_size().unwrap_or(0);
674  
675          // Remove expired entries
676          let expired_removed = self.cleanup_expired()?;
677  
678          // Save compacted cache to disk
679          self.save_to_disk()?;
680  
681          let final_entries = self.entries.len();
682          let final_size = self.disk_size().unwrap_or(0);
683  
684          Ok(CompactionResult {
685              entries_before: initial_entries,
686              entries_after: final_entries,
687              entries_removed: expired_removed,
688              size_before: initial_size,
689              size_after: final_size,
690              size_saved: initial_size.saturating_sub(final_size),
691          })
692      }
693  }
694  
695  impl PersistentStorage {
696      fn new(cache_dir: &Path) -> Self {
697          Self {
698              cache_dir: cache_dir.to_path_buf(),
699          }
700      }
701  
702      fn store(&self, key: &CacheKey, entry: &CacheEntry) -> Result<()> {
703          let file_name = self.key_to_filename(key);
704          let file_path = self.cache_dir.join(file_name);
705  
706          // Create cache directory if it doesn't exist
707          std::fs::create_dir_all(&self.cache_dir)?;
708  
709          let serialized = bincode::serialize(&(key, entry))?;
710  
711          // Use zstd compression with level 3 for good balance of speed/compression
712          let compressed = zstd::encode_all(serialized.as_slice(), 3)?;
713  
714          // Write atomically using a temporary file
715          let temp_path = file_path.with_extension("tmp");
716          std::fs::write(&temp_path, compressed)?;
717          std::fs::rename(temp_path, file_path)?;
718  
719          Ok(())
720      }
721  
722      fn remove(&self, key: &CacheKey) -> Result<()> {
723          let file_name = self.key_to_filename(key);
724          let file_path = self.cache_dir.join(file_name);
725  
726          if file_path.exists() {
727              std::fs::remove_file(file_path)?;
728          }
729          Ok(())
730      }
731  
732      fn load_all(&self) -> Result<HashMap<CacheKey, CacheEntry>> {
733          let mut entries = HashMap::new();
734  
735          if !self.cache_dir.exists() {
736              return Ok(entries);
737          }
738  
739          // Recursively search for cache files in subdirectories
740          fn visit_dir(
741              dir: &Path,
742              entries: &mut HashMap<CacheKey, CacheEntry>,
743              storage: &PersistentStorage,
744          ) -> Result<()> {
745              for entry in std::fs::read_dir(dir)? {
746                  let entry = entry?;
747                  let path = entry.path();
748  
749                  if path.is_dir() {
750                      // Recursively visit subdirectories
751                      visit_dir(&path, entries, storage)?;
752                  } else if path.is_file()
753                      && path.extension().and_then(|s| s.to_str()) == Some("cache")
754                  {
755                      match storage.load_entry(&path) {
756                          Ok((key, cache_entry)) => {
757                              entries.insert(key, cache_entry);
758                          }
759                          Err(e) => {
760                              tracing::warn!("Failed to load cache entry {:?}: {}", path, e);
761                              // Optionally remove corrupted cache files
762                              let _ = std::fs::remove_file(&path);
763                          }
764                      }
765                  }
766              }
767              Ok(())
768          }
769  
770          visit_dir(&self.cache_dir, &mut entries, self)?;
771          Ok(entries)
772      }
773  
774      fn save_all(&self, entries: &HashMap<CacheKey, CacheEntry>) -> Result<()> {
775          std::fs::create_dir_all(&self.cache_dir)?;
776  
777          for (key, entry) in entries {
778              if let Err(e) = self.store(key, entry) {
779                  tracing::warn!("Failed to save cache entry: {}", e);
780                  // Continue with other entries even if one fails
781              }
782          }
783          Ok(())
784      }
785  
786      fn clear(&self) -> Result<()> {
787          if !self.cache_dir.exists() {
788              return Ok(());
789          }
790  
791          // Recursively remove all cache files
792          fn visit_dir(dir: &Path) -> Result<()> {
793              for entry in std::fs::read_dir(dir)? {
794                  let entry = entry?;
795                  let path = entry.path();
796  
797                  if path.is_dir() {
798                      // Recursively visit subdirectories
799                      visit_dir(&path)?;
800                      // Try to remove empty directory
801                      let _ = std::fs::remove_dir(&path);
802                  } else if path.is_file()
803                      && path.extension().and_then(|s| s.to_str()) == Some("cache")
804                  {
805                      if let Err(e) = std::fs::remove_file(&path) {
806                          tracing::warn!("Failed to remove cache file {:?}: {}", path, e);
807                      }
808                  }
809              }
810              Ok(())
811          }
812  
813          visit_dir(&self.cache_dir)?;
814          Ok(())
815      }
816  
817      fn load_entry(&self, path: &Path) -> Result<(CacheKey, CacheEntry)> {
818          let compressed = std::fs::read(path)?;
819          let serialized = zstd::decode_all(compressed.as_slice())?;
820          let (key, entry): (CacheKey, CacheEntry) = bincode::deserialize(&serialized)?;
821          Ok((key, entry))
822      }
823  
824      /// Generate content-addressed filename from cache key
825      fn key_to_filename(&self, key: &CacheKey) -> String {
826          use std::collections::hash_map::DefaultHasher;
827          use std::hash::{Hash, Hasher};
828  
829          let mut hasher = DefaultHasher::new();
830          key.hash(&mut hasher);
831          let hash = hasher.finish();
832  
833          // Use content-addressed storage: first two hex digits as subdirectory
834          let subdir = format!("{:02x}", (hash >> 56) & 0xFF);
835          let filename = format!("{:016x}.cache", hash);
836  
837          // Create subdirectory path
838          let subdir_path = self.cache_dir.join(&subdir);
839          if let Err(e) = std::fs::create_dir_all(&subdir_path) {
840              tracing::warn!("Failed to create cache subdirectory {}: {}", subdir, e);
841          }
842  
843          format!("{}/{}", subdir, filename)
844      }
845  
846      /// Get cache directory size in bytes
847      pub fn cache_size(&self) -> Result<u64> {
848          let mut total_size = 0u64;
849  
850          if !self.cache_dir.exists() {
851              return Ok(0);
852          }
853  
854          fn visit_dir(dir: &Path, total: &mut u64) -> Result<()> {
855              for entry in std::fs::read_dir(dir)? {
856                  let entry = entry?;
857                  let path = entry.path();
858  
859                  if path.is_dir() {
860                      visit_dir(&path, total)?;
861                  } else if path.is_file() {
862                      if let Ok(metadata) = entry.metadata() {
863                          *total += metadata.len();
864                      }
865                  }
866              }
867              Ok(())
868          }
869  
870          visit_dir(&self.cache_dir, &mut total_size)?;
871          Ok(total_size)
872      }
873  
874      /// Validate cache integrity
875      pub fn validate(&self) -> Result<Vec<String>> {
876          let mut errors = Vec::new();
877  
878          if !self.cache_dir.exists() {
879              return Ok(errors);
880          }
881  
882          for entry in std::fs::read_dir(&self.cache_dir)? {
883              let entry = entry?;
884              let path = entry.path();
885  
886              if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("cache") {
887                  if let Err(e) = self.load_entry(&path) {
888                      errors.push(format!("Corrupted cache file {:?}: {}", path, e));
889                  }
890              }
891          }
892  
893          Ok(errors)
894      }
895  }
896  
897  #[derive(Debug, Clone)]
898  pub struct CacheStatistics {
899      pub total_entries: usize,
900      pub expired_entries: usize,
901      pub valid_entries: usize,
902      pub total_size_bytes: u64,
903      pub cache_dir: PathBuf,
904  }
905  
906  #[derive(Debug, Clone)]
907  pub struct CompactionResult {
908      pub entries_before: usize,
909      pub entries_after: usize,
910      pub entries_removed: usize,
911      pub size_before: u64,
912      pub size_after: u64,
913      pub size_saved: u64,
914  }
915  
916  impl Default for VerificationCache {
917      fn default() -> Self {
918          Self::new()
919      }
920  }