/ abzu-chameleon / src / histogram_presets.rs
histogram_presets.rs
  1  //! Histogram Presets for Cold-Start Seeding
  2  //!
  3  //! SECURITY: New nodes starting with identical empty histograms create a 
  4  //! "cold start monoculture" fingerprint. By shipping 5 distinct preset
  5  //! profiles and selecting deterministically based on node identity, we
  6  //! eliminate this vulnerability.
  7  //!
  8  //! Each preset represents a plausible traffic pattern learned over time:
  9  //! - Preset 0: "Night Owl" - Longer delays, smaller packets
 10  //! - Preset 1: "Active User" - Short delays, mixed sizes
 11  //! - Preset 2: "Casual Browser" - Medium delays, medium packets
 12  //! - Preset 3: "Data Syncer" - Bursty large packets
 13  //! - Preset 4: "Background App" - Long delays, small keep-alives
 14  
 15  /// Number of delay histogram buckets (10ms each, 0-640ms)
 16  pub const DELAY_BUCKETS: usize = 64;
 17  /// Number of size buckets
 18  pub const SIZE_BUCKETS: usize = 6;
 19  
 20  /// A histogram preset containing pre-seeded distributions
 21  #[derive(Debug, Clone)]
 22  pub struct HistogramPreset {
 23      pub name: &'static str,
 24      pub delay_histogram: [u32; DELAY_BUCKETS],
 25      pub size_histogram: [u32; SIZE_BUCKETS],
 26      /// Effective sample count to start with
 27      pub initial_samples: u64,
 28  }
 29  
 30  /// Preset 0: "Night Owl" - Longer delays, smaller packets
 31  /// Typical of background sync during low-activity periods
 32  pub static PRESET_NIGHT_OWL: HistogramPreset = HistogramPreset {
 33      name: "night_owl",
 34      // Delay distribution: peak around 300-500ms (buckets 30-50)
 35      delay_histogram: [
 36          1, 1, 1, 1, 1, 2, 2, 2, 3, 3,  // 0-100ms: low
 37          3, 3, 4, 4, 4, 5, 5, 5, 6, 6,  // 100-200ms: building
 38          7, 7, 8, 8, 9, 10, 11, 12, 14, 16, // 200-300ms: moderate
 39          18, 20, 22, 25, 28, 30, 32, 35, 38, 40, // 300-400ms: peak
 40          38, 35, 32, 28, 25, 22, 18, 15, 12, 10, // 400-500ms: declining
 41          8, 6, 5, 4, 3, 3, 2, 2, 2, 1, // 500-600ms: tail
 42          1, 1, 1, 1, // 600-640ms: rare
 43      ],
 44      // Size distribution: mostly small packets (buckets 0-1)
 45      size_histogram: [40, 30, 15, 10, 4, 1],
 46      initial_samples: 100,
 47  };
 48  
 49  /// Preset 1: "Active User" - Short delays, mixed sizes
 50  /// Typical of interactive chat or browsing
 51  pub static PRESET_ACTIVE_USER: HistogramPreset = HistogramPreset {
 52      name: "active_user",
 53      // Delay distribution: bimodal - quick responses and thinking pauses
 54      delay_histogram: [
 55          25, 30, 35, 28, 22, 18, 15, 12, 10, 9, // 0-100ms: quick responses
 56          8, 10, 12, 15, 18, 20, 22, 18, 15, 12, // 100-200ms: secondary peak
 57          10, 8, 6, 5, 5, 4, 4, 3, 3, 3, // 200-300ms: declining
 58          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 300-400ms: flat tail
 59          2, 2, 2, 2, 2, 2, 2, 2, 1, 1, // 400-500ms
 60          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 500-600ms
 61          1, 1, 1, 1, // 600-640ms
 62      ],
 63      // Size distribution: mixed, slight preference for medium
 64      size_histogram: [20, 25, 28, 15, 8, 4],
 65      initial_samples: 100,
 66  };
 67  
 68  /// Preset 2: "Casual Browser" - Medium delays, medium packets
 69  /// Typical web browsing pattern with read/click cycles
 70  pub static PRESET_CASUAL_BROWSER: HistogramPreset = HistogramPreset {
 71      name: "casual_browser",
 72      // Delay distribution: broad peak around 150-250ms
 73      delay_histogram: [
 74          5, 6, 7, 8, 9, 10, 12, 14, 16, 18, // 0-100ms: building
 75          20, 22, 25, 28, 30, 32, 34, 35, 34, 32, // 100-200ms: peak
 76          30, 28, 25, 22, 20, 18, 16, 14, 12, 10, // 200-300ms: declining
 77          9, 8, 7, 6, 5, 5, 4, 4, 4, 3, // 300-400ms: tail
 78          3, 3, 3, 2, 2, 2, 2, 2, 2, 2, // 400-500ms
 79          2, 2, 1, 1, 1, 1, 1, 1, 1, 1, // 500-600ms
 80          1, 1, 1, 1, // 600-640ms
 81      ],
 82      // Size distribution: slight preference for larger responses
 83      size_histogram: [15, 18, 22, 25, 12, 8],
 84      initial_samples: 100,
 85  };
 86  
 87  /// Preset 3: "Data Syncer" - Bursty large packets
 88  /// Typical of file sync or backup applications
 89  pub static PRESET_DATA_SYNCER: HistogramPreset = HistogramPreset {
 90      name: "data_syncer",
 91      // Delay distribution: very short bursts with long pauses
 92      delay_histogram: [
 93          35, 30, 25, 20, 15, 12, 10, 8, 6, 5, // 0-100ms: burst mode
 94          4, 4, 3, 3, 3, 2, 2, 2, 2, 2, // 100-200ms
 95          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 200-300ms: flat
 96          2, 2, 2, 2, 2, 3, 3, 3, 4, 4, // 300-400ms: starting to rise
 97          5, 6, 8, 10, 12, 15, 18, 20, 22, 25, // 400-500ms: inter-burst pause
 98          28, 25, 20, 15, 10, 8, 5, 3, 2, 2, // 500-600ms: declining
 99          2, 2, 1, 1, // 600-640ms
100      ],
101      // Size distribution: heavy on large packets
102      size_histogram: [8, 10, 15, 20, 25, 22],
103      initial_samples: 100,
104  };
105  
106  /// Preset 4: "Background App" - Long delays, small keep-alives
107  /// Typical of dormant apps sending heartbeats
108  pub static PRESET_BACKGROUND_APP: HistogramPreset = HistogramPreset {
109      name: "background_app",
110      // Delay distribution: heavily skewed to long delays
111      delay_histogram: [
112          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0-100ms: rare
113          1, 1, 1, 1, 1, 2, 2, 2, 2, 2, // 100-200ms
114          2, 2, 3, 3, 3, 4, 4, 5, 5, 6, // 200-300ms: building
115          7, 8, 10, 12, 14, 16, 18, 20, 22, 25, // 300-400ms: rising
116          28, 30, 32, 35, 38, 40, 42, 44, 45, 44, // 400-500ms: plateau
117          42, 38, 35, 30, 25, 20, 15, 10, 8, 5, // 500-600ms: declining
118          4, 3, 2, 2, // 600-640ms: tail
119      ],
120      // Size distribution: mostly tiny keep-alive packets
121      size_histogram: [55, 25, 10, 6, 3, 1],
122      initial_samples: 100,
123  };
124  
125  /// All available presets for deterministic selection
126  pub static ALL_PRESETS: [&HistogramPreset; 5] = [
127      &PRESET_NIGHT_OWL,
128      &PRESET_ACTIVE_USER,
129      &PRESET_CASUAL_BROWSER,
130      &PRESET_DATA_SYNCER,
131      &PRESET_BACKGROUND_APP,
132  ];
133  
134  /// Select a preset deterministically based on node identity hash.
135  ///
136  /// SECURITY: Same identity = same preset forever, preventing the
137  /// "cold start monoculture" fingerprint.
138  ///
139  /// # Arguments
140  /// * `identity_hash` - First 32 bytes of node identity
141  pub fn select_preset(identity_hash: &[u8; 32]) -> &'static HistogramPreset {
142      // Use bytes 16-24 to avoid collision with TLS profile selection (0-16)
143      let selector = u64::from_le_bytes(identity_hash[16..24].try_into().unwrap());
144      let index = (selector as usize) % ALL_PRESETS.len();
145      ALL_PRESETS[index]
146  }
147  
148  #[cfg(test)]
149  mod tests {
150      use super::*;
151  
152      #[test]
153      fn test_preset_selection_deterministic() {
154          let identity1 = [0x42u8; 32];
155          let identity2 = [0x42u8; 32];
156          
157          let preset1 = select_preset(&identity1);
158          let preset2 = select_preset(&identity2);
159          
160          assert_eq!(preset1.name, preset2.name, "Same identity = same preset");
161      }
162  
163      #[test]
164      fn test_all_presets_valid() {
165          for preset in ALL_PRESETS {
166              // Verify histograms are properly sized
167              assert_eq!(preset.delay_histogram.len(), DELAY_BUCKETS);
168              assert_eq!(preset.size_histogram.len(), SIZE_BUCKETS);
169              
170              // Verify non-zero totals
171              let delay_sum: u32 = preset.delay_histogram.iter().sum();
172              let size_sum: u32 = preset.size_histogram.iter().sum();
173              assert!(delay_sum > 0, "Delay histogram should have weight");
174              assert!(size_sum > 0, "Size histogram should have weight");
175              
176              // Verify initial samples set
177              assert!(preset.initial_samples >= 50, "Should seed with enough samples");
178          }
179      }
180  
181      #[test]
182      fn test_presets_are_distinct() {
183          // Verify presets have meaningfully different distributions
184          let names: Vec<_> = ALL_PRESETS.iter().map(|p| p.name).collect();
185          for (i, name) in names.iter().enumerate() {
186              for (j, other) in names.iter().enumerate() {
187                  if i != j {
188                      assert_ne!(name, other, "Presets should have unique names");
189                  }
190              }
191          }
192      }
193  }