/ project / architecture / machine / error-handling.cspec
error-handling.cspec
  1  # Error Handling & Logging Standards
  2  # purpose: Error codes, logging standards, structured logs
  3  # status: draft
  4  # updated: 2026-01-05
  5  
  6  # === ERROR HANDLING PHILOSOPHY ===
  7  philosophy:
  8    principles:
  9      - Fail fast and loud (debug builds)
 10      - Fail gracefully (release builds)
 11      - Provide actionable error messages
 12      - Never expose internal details to users
 13      - Log everything needed for debugging
 14  
 15    rust_patterns:
 16      prefer: Result<T, E> over panic!
 17      use_anyhow: For application errors (bin crates)
 18      use_thiserror: For library errors (lib crates)
 19  
 20  # === ERROR CATEGORIES ===
 21  categories:
 22    user_error:
 23      description: Invalid user input or request
 24      response: 4xx HTTP, user-friendly message
 25      logging: INFO level
 26      examples:
 27        - Invalid address format
 28        - Insufficient balance
 29        - Invalid signature
 30  
 31    system_error:
 32      description: Internal system failure
 33      response: 5xx HTTP, generic message
 34      logging: ERROR level, full stack trace
 35      examples:
 36        - Database connection failed
 37        - Out of memory
 38        - Unexpected panic
 39  
 40    network_error:
 41      description: Communication failures
 42      response: Retry with backoff
 43      logging: WARN level
 44      examples:
 45        - Peer disconnected
 46        - Timeout
 47        - DNS resolution failed
 48  
 49    consensus_error:
 50      description: Protocol violations
 51      response: Reject, possibly slash
 52      logging: ERROR level, detailed context
 53      examples:
 54        - Invalid block
 55        - Double vote
 56        - Invalid state transition
 57  
 58  # === ERROR CODES ===
 59  error_codes:
 60    format: "{CATEGORY}_{NUMBER}"
 61  
 62    ranges:
 63      AUTH_1xx: Authentication errors
 64      VALID_2xx: Validation errors
 65      TX_3xx: Transaction errors
 66      CONSENSUS_4xx: Consensus errors
 67      NETWORK_5xx: Network errors
 68      INTERNAL_9xx: Internal errors
 69  
 70    examples:
 71      AUTH_101: {code: "AUTH_101", message: "Invalid API key"}
 72      AUTH_102: {code: "AUTH_102", message: "Token expired"}
 73      VALID_201: {code: "VALID_201", message: "Invalid address format"}
 74      VALID_202: {code: "VALID_202", message: "Amount must be positive"}
 75      TX_301: {code: "TX_301", message: "Insufficient balance"}
 76      TX_302: {code: "TX_302", message: "Nonce too low"}
 77      TX_303: {code: "TX_303", message: "Gas limit exceeded"}
 78      CONSENSUS_401: {code: "CONSENSUS_401", message: "Invalid block hash"}
 79      CONSENSUS_402: {code: "CONSENSUS_402", message: "Invalid validator signature"}
 80      NETWORK_501: {code: "NETWORK_501", message: "Peer connection failed"}
 81      INTERNAL_901: {code: "INTERNAL_901", message: "Database error"}
 82      INTERNAL_999: {code: "INTERNAL_999", message: "Unexpected error"}
 83  
 84  # === ERROR RESPONSE FORMAT ===
 85  response_format:
 86    api:
 87      structure:
 88        error: Error code string
 89        message: Human-readable message
 90        details: Optional additional context
 91        request_id: For support reference
 92  
 93      example:
 94        error: "TX_301"
 95        message: "Insufficient balance"
 96        details:
 97          required: "1000000"
 98          available: "500000"
 99        request_id: "req_abc123"
100  
101    internal:
102      structure:
103        code: Error code
104        message: Message
105        source: Source error (if chained)
106        context: Key-value pairs
107        backtrace: Stack trace (debug only)
108  
109  # === LOGGING STANDARDS ===
110  logging:
111    framework: tracing (Rust)
112  
113    levels:
114      ERROR: System failures, requires attention
115      WARN: Recoverable issues, degraded state
116      INFO: Normal operations, milestones
117      DEBUG: Detailed flow, development use
118      TRACE: Very verbose, performance impact
119  
120    format:
121      structure: JSON
122      fields:
123        - timestamp: ISO8601 with timezone
124        - level: ERROR|WARN|INFO|DEBUG|TRACE
125        - target: Module path
126        - message: Log message
127        - span: Active spans (context)
128        - fields: Structured key-value data
129  
130    example:
131      timestamp: "2026-01-05T15:04:05.123Z"
132      level: "INFO"
133      target: "adnet::api::transactions"
134      message: "Transaction broadcast"
135      span:
136        request_id: "req_abc123"
137        remote_addr: "192.168.1.1"
138      fields:
139        txid: "tx_xyz789"
140        size_bytes: 256
141  
142  # === SPAN CONTEXT ===
143  spans:
144    purpose: Group related log entries
145    propagation: Across async boundaries
146  
147    required_spans:
148      request: API request handling
149      transaction: Transaction processing
150      block: Block processing
151      consensus: Consensus round
152  
153    fields:
154      request:
155        - request_id
156        - method
157        - path
158        - remote_addr
159      transaction:
160        - txid
161        - sender
162        - type
163      block:
164        - height
165        - hash
166        - validator
167  
168  # === SENSITIVE DATA ===
169  sensitive_data:
170    never_log:
171      - Private keys
172      - Passwords
173      - API tokens
174      - Session tokens
175  
176    redact:
177      - Full addresses (show first/last 6 chars)
178      - IP addresses in public logs
179      - Account balances (aggregate only)
180  
181    redaction_format:
182      address: "ax1abc...xyz123"
183      ip: "192.168.x.x"
184      key: "[REDACTED]"
185  
186  # === LOG AGGREGATION ===
187  aggregation:
188    destinations:
189      - Local file: /var/log/adnet/
190      - Stdout: For container environments
191      - SIEM: Forward for security analysis
192  
193    retention:
194      hot: 7 days (fast search)
195      warm: 30 days (slower search)
196      cold: 90 days (archive)
197  
198    rotation:
199      size: 100MB per file
200      count: 10 files
201      compression: gzip
202  
203  # === ERROR RECOVERY ===
204  recovery:
205    patterns:
206      retry:
207        use_for: [network_errors, transient_failures]
208        strategy: Exponential backoff
209        max_attempts: 5
210        base_delay: 100ms
211        max_delay: 30s
212  
213      circuit_breaker:
214        use_for: [external_service_calls]
215        failure_threshold: 5
216        reset_timeout: 60s
217  
218      fallback:
219        use_for: [non-critical_features]
220        behavior: Degrade gracefully
221  
222      panic_handling:
223        catch: At service boundaries
224        action: Log, restart service
225        never_catch: In consensus code
226  
227  # === DEBUGGING SUPPORT ===
228  debugging:
229    request_id:
230      generation: UUID v4
231      header: X-Request-ID
232      propagation: All internal calls
233  
234    debug_bundle:
235      contents:
236        - Recent logs (last 1000 lines)
237        - System info (uname, memory, disk)
238        - Configuration (redacted)
239        - Stack traces
240      command: adnet diagnostics bundle
241      output: debug-bundle-{timestamp}.tar.gz
242  
243    performance:
244      metrics: Response time per endpoint
245      slow_query_log: >1s queries
246      profiling: Optional CPU/memory profiling