/ abzu-inference / src / error.rs
error.rs
  1  //! Error types for mesh inference.
  2  
  3  use thiserror::Error;
  4  
  5  /// Errors that can occur during mesh inference.
  6  #[derive(Debug, Error)]
  7  pub enum InferenceError {
  8      /// Not enough shards available in the mesh
  9      #[error("insufficient shards: need {required}, have {available}")]
 10      InsufficientShards {
 11          required: usize,
 12          available: usize,
 13      },
 14  
 15      /// Timed out waiting for shard responses
 16      #[error("inference timeout after {0:?}")]
 17      Timeout(std::time::Duration),
 18  
 19      /// Failed to split input into shards
 20      #[error("sharding failed: {0}")]
 21      ShardingFailed(String),
 22  
 23      /// Failed to aggregate partial results
 24      #[error("aggregation failed: need {required} results, got {received}")]
 25      AggregationFailed {
 26          required: usize,
 27          received: usize,
 28      },
 29  
 30      /// Network error during shard communication
 31      #[error("network error: {0}")]
 32      Network(String),
 33  
 34      /// Cryptographic error
 35      #[error("crypto error: {0}")]
 36      Crypto(String),
 37  
 38      /// Invalid shard configuration
 39      #[error("invalid config: {0}")]
 40      InvalidConfig(String),
 41  
 42      /// Shard returned invalid data
 43      #[error("invalid shard response from {node}: {reason}")]
 44      InvalidShardResponse {
 45          node: String,
 46          reason: String,
 47      },
 48  
 49      /// Rate limited by mesh
 50      #[error("rate limited: retry after {seconds}s")]
 51      RateLimited {
 52          seconds: u64,
 53      },
 54  }
 55  
 56  impl InferenceError {
 57      /// Check if this error is retryable
 58      pub fn is_retryable(&self) -> bool {
 59          matches!(
 60              self,
 61              InferenceError::Timeout(_)
 62                  | InferenceError::Network(_)
 63                  | InferenceError::RateLimited { .. }
 64          )
 65      }
 66  
 67      /// Get retry delay if applicable
 68      pub fn retry_delay(&self) -> Option<std::time::Duration> {
 69          match self {
 70              InferenceError::RateLimited { seconds } => {
 71                  Some(std::time::Duration::from_secs(*seconds))
 72              }
 73              InferenceError::Timeout(_) => {
 74                  Some(std::time::Duration::from_secs(1))
 75              }
 76              InferenceError::Network(_) => {
 77                  Some(std::time::Duration::from_millis(500))
 78              }
 79              _ => None,
 80          }
 81      }
 82  }
 83  
 84  #[cfg(test)]
 85  mod tests {
 86      use super::*;
 87  
 88      #[test]
 89      fn test_retryable_errors() {
 90          assert!(InferenceError::Timeout(std::time::Duration::from_secs(5)).is_retryable());
 91          assert!(InferenceError::RateLimited { seconds: 10 }.is_retryable());
 92          assert!(InferenceError::Network("connection reset".into()).is_retryable());
 93          
 94          assert!(!InferenceError::InvalidConfig("bad".into()).is_retryable());
 95          assert!(!InferenceError::ShardingFailed("fail".into()).is_retryable());
 96      }
 97  
 98      #[test]
 99      fn test_retry_delay() {
100          let rate_limited = InferenceError::RateLimited { seconds: 30 };
101          assert_eq!(
102              rate_limited.retry_delay(),
103              Some(std::time::Duration::from_secs(30))
104          );
105  
106          let timeout = InferenceError::Timeout(std::time::Duration::from_secs(10));
107          assert_eq!(
108              timeout.retry_delay(),
109              Some(std::time::Duration::from_secs(1))
110          );
111      }
112  }