headerssync-params.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2022 Pieter Wuille 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 """Script to find the optimal parameters for the headerssync module through simulation.""" 7 8 from datetime import datetime, timedelta 9 from math import log, exp, sqrt 10 import random 11 12 # Parameters: 13 14 # Aim for still working fine at some point in the future. [datetime] 15 TIME = datetime(2028, 10, 10) 16 17 # Expected block interval. [timedelta] 18 BLOCK_INTERVAL = timedelta(seconds=600) 19 20 # The number of headers corresponding to the minchainwork parameter. [headers] 21 MINCHAINWORK_HEADERS = 938343 22 23 # Combined processing bandwidth from all attackers to one victim. [bit/s] 24 # 6 Gbit/s is approximately the speed at which a single thread of a Ryzen 5950X CPU thread can hash 25 # headers. In practice, the victim's network bandwidth and network processing overheads probably 26 # impose a far lower number, but it's a useful upper bound. 27 ATTACK_BANDWIDTH = 6000000000 28 29 # How much additional permanent memory usage are attackers (jointly) allowed to cause in the victim, 30 # expressed as fraction of the normal memory usage due to mainchain growth, for the duration the 31 # attack is sustained. [unitless] 32 # 0.2 means that attackers, while they keep up the attack, can cause permanent memory usage due to 33 # headers storage to grow at 1.2 header per BLOCK_INTERVAL. 34 ATTACK_FRACTION = 0.2 35 36 # When this is set, the mapping from period size to memory usage (at optimal buffer size for that 37 # period) is assumed to be convex. This greatly speeds up the computation, and does not appear 38 # to influence the outcome. Set to False for a stronger guarantee to get the optimal result. 39 ASSUME_CONVEX = True 40 41 # Explanation: 42 # 43 # The headerssync module implements a DoS protection against low-difficulty header spam which does 44 # not rely on checkpoints. In short it works as follows: 45 # 46 # - (initial) header synchronization is split into two phases: 47 # - A commitment phase, in which headers are downloaded from the peer, and a very compact 48 # commitment to them is remembered in per-peer memory. The commitment phase ends when the 49 # received chain's combined work reaches a predetermined threshold. 50 # - A redownload phase, during which the headers are downloaded a second time from the same peer, 51 # and compared against the commitment constructed in the first phase. If there is a match, the 52 # redownloaded headers are fed to validation and accepted into permanent storage. 53 # 54 # This separation guarantees that no headers are accepted into permanent storage without 55 # requiring the peer to first prove the chain actually has sufficient work. 56 # 57 # - To actually implement this commitment mechanism, the following approach is used: 58 # - Keep a *1 bit* commitment (constructed using a salted hash function), for every block whose 59 # height is a multiple of {period} plus an offset value. If RANDOMIZE_OFFSET, the offset, 60 # like the salt, is chosen randomly when the synchronization starts and kept fixed afterwards. 61 # - When redownloading, headers are fed through a per-peer queue that holds {bufsize} headers, 62 # before passing them to validation. All the headers in this queue are verified against the 63 # commitment bits created in the first phase before any header is released from it. This means 64 # {bufsize/period} bits are checked "on top of" each header before actually processing it, 65 # which results in a commitment structure with roughly {bufsize/period} bits of security, as 66 # once a header is modified, due to the prevhash inclusion, all future headers necessarily 67 # change as well. 68 # 69 # The question is what these {period} and {bufsize} parameters need to be set to. This program 70 # exhaustively tests a range of values to find the optimal choice, taking into account: 71 # 72 # - Minimizing the (maximum of) two scenarios that trigger per-peer memory usage: 73 # 74 # - When downloading a (likely honest) chain that reaches the chainwork threshold after {n} 75 # blocks, and then redownloads them, we will consume per-peer memory that is sufficient to 76 # store {n/period} commitment bits and {bufsize} headers. We only consider attackers without 77 # sufficient hashpower (as otherwise they are from a PoW perspective not attackers), which 78 # means {n} is restricted to the honest chain's length before reaching minchainwork. 79 # 80 # - When downloading a (likely false) chain of {n} headers that never reaches the chainwork 81 # threshold, we will consume per-peer memory that is sufficient to store {n/period} 82 # commitment bits. Such a chain may be very long, by exploiting the timewarp bug to avoid 83 # ramping up difficulty. There is however an absolute limit on how long such a chain can be: 6 84 # blocks per second since genesis, due to the increasing MTP consensus rule. 85 # 86 # - Not gratuitously preventing synchronizing any valid chain, however difficult such a chain may 87 # be to construct. In particular, the above scenario with an enormous timewarp-expoiting chain 88 # cannot simply be ignored, as it is legal that the honest main chain is like that. We however 89 # do not bother minimizing the memory usage in that case (because a billion-header long honest 90 # chain will inevitably use far larger amounts of memory than designed for). 91 # 92 # - Keep the rate at which attackers can get low-difficulty headers accepted to the block index 93 # negligible. Specifically, the possibility exists for an attacker to send the honest main 94 # chain's headers during the commitment phase, but then start deviating at an attacker-chosen 95 # point by sending novel low-difficulty headers instead. Depending on how high we set the 96 # {bufsize/period} ratio, we can make the probability that such a header makes it in 97 # arbitrarily small, but at the cost of higher memory during the redownload phase. It turns out, 98 # some rate of memory usage growth is expected anyway due to chain growth, so permitting the 99 # attacker to increase that rate by a small factor isn't concerning. The attacker may start 100 # somewhat later than genesis, as long as the difficulty doesn't get too high. This reduces 101 # the attacker bandwidth required at the cost of higher PoW needed for constructing the 102 # alternate chain. This trade-off is ignored here, as it results in at most a small constant 103 # factor in attack rate. 104 105 106 # System properties: 107 108 # Headers in the redownload buffer are stored without prevhash. [bits] 109 COMPACT_HEADER_SIZE = 48 * 8 110 111 # How many bits a header uses in P2P protocol. [bits] 112 NET_HEADER_SIZE = 81 * 8 113 114 # How many headers are sent at once. [headers] 115 HEADER_BATCH_COUNT = 2000 116 117 # Whether or not the offset of which blocks heights get checksummed is randomized. 118 RANDOMIZE_OFFSET = True 119 120 # Timestamp of the genesis block 121 GENESIS_TIME = datetime(2009, 1, 3) 122 123 # Derived values: 124 125 # What rate of headers worth of RAM attackers are allowed to cause in the victim. [headers/s] 126 LIMIT_HEADERRATE = ATTACK_FRACTION / BLOCK_INTERVAL.total_seconds() 127 128 # How many headers can attackers (jointly) send a victim per second. [headers/s] 129 NET_HEADERRATE = ATTACK_BANDWIDTH / NET_HEADER_SIZE 130 131 # What fraction of headers sent by attackers can at most be accepted by a victim [unitless] 132 LIMIT_FRACTION = LIMIT_HEADERRATE / NET_HEADERRATE 133 134 # How many headers we permit attackers to cause being accepted per attack. [headers/attack] 135 ATTACK_HEADERS = LIMIT_FRACTION * MINCHAINWORK_HEADERS 136 137 138 def find_max_headers(when): 139 """Compute the maximum number of headers a valid Bitcoin chain can have at given time.""" 140 # When exploiting the timewarp attack, this can be up to 6 per second since genesis. 141 return 6 * ((when - GENESIS_TIME) // timedelta(seconds=1)) 142 143 144 def lambert_w(value): 145 """Solve the equation x*exp(x)=value (x > 0, value > 0).""" 146 # Initial approximation. 147 approx = max(log(value), 0.0) 148 for _ in range(10): 149 # Newton-Rhapson iteration steps. 150 approx += (value * exp(-approx) - approx) / (approx + 1.0) 151 return approx 152 153 154 def attack_rate(period, bufsize, limit=None): 155 """Compute maximal accepted headers per attack in (period, bufsize) configuration. 156 157 If limit is provided, the computation is stopped early when the result is known to exceed the 158 value in limit. 159 """ 160 161 max_rate = None 162 max_honest = None 163 # Let the current batch 0 being received be the first one in which the attacker starts lying. 164 # They will only ever start doing so right after a commitment block, but where that is can be 165 # in a number of places. Let honest be the number of honest headers in this current batch, 166 # preceding the forged ones. 167 for honest in range(HEADER_BATCH_COUNT): 168 # The number of headers the attack under consideration will on average get accepted. 169 # This is the number being computed. 170 rate = 0 171 172 # Iterate over the possible alignments of commitments w.r.t. the first batch. In case 173 # the alignments are randomized, try all values. If not, the attacker can know/choose 174 # the alignment, and will always start forging right after a commitment. 175 if RANDOMIZE_OFFSET: 176 align_choices = list(range(period)) 177 else: 178 align_choices = [(honest - 1) % period] 179 # Now loop over those possible alignment values, computing the average attack rate 180 # over them by dividing each contribution by len(align_choices). 181 for align in align_choices: 182 # These state variables capture the situation after receiving the first batch. 183 # - The number of headers received after the last commitment for an honest block: 184 after_good_commit = HEADER_BATCH_COUNT - honest + ((honest - align - 1) % period) 185 # - The number of forged headers in the redownload buffer: 186 forged_in_buf = HEADER_BATCH_COUNT - honest 187 188 # Now iterate over the next batches of headers received, adding contributions to the 189 # rate variable. 190 while True: 191 # Process the first HEADER_BATCH_COUNT headers in the buffer: 192 accept_forged_headers = max(forged_in_buf - bufsize, 0) 193 forged_in_buf -= accept_forged_headers 194 if accept_forged_headers: 195 # The probability the attack has not been detected yet at this point: 196 prob = 0.5 ** (after_good_commit // period) 197 # Update attack rate, divided by align_choices to average over the alignments. 198 rate += accept_forged_headers * prob / len(align_choices) 199 # If this means we exceed limit, bail out early (performance optimization). 200 if limit is not None and rate >= limit: 201 return rate, None 202 # If the maximal term being added is negligible compared to rate, stop 203 # iterating. 204 if HEADER_BATCH_COUNT * prob < 1.0e-16 * rate * len(align_choices): 205 break 206 # Update state from a new incoming batch (which is all forged) 207 after_good_commit += HEADER_BATCH_COUNT 208 forged_in_buf += HEADER_BATCH_COUNT 209 210 if max_rate is None or rate > max_rate: 211 max_rate = rate 212 max_honest = honest 213 214 return max_rate, max_honest 215 216 217 def memory_usage(period, bufsize, when): 218 """How much memory (max,mainchain,timewarp) does the (period,bufsize) configuration need?""" 219 220 # Per-peer memory usage for a timewarp chain that never meets minchainwork 221 mem_timewarp = find_max_headers(when) // period 222 # Per-peer memory usage for being fed the main chain 223 mem_mainchain = (MINCHAINWORK_HEADERS // period) + bufsize * COMPACT_HEADER_SIZE 224 # Maximum per-peer memory usage 225 max_mem = max(mem_timewarp, mem_mainchain) 226 227 return max_mem, mem_mainchain, mem_timewarp 228 229 def find_bufsize(period, attack_headers, when, max_mem=None, min_bufsize=1): 230 """Determine how big bufsize needs to be given a specific period length. 231 232 Given a period, find the smallest value of bufsize such that the attack rate against the 233 (period, bufsize) configuration is below attack_headers. If max_mem is provided, and no 234 such bufsize exists that needs less than max_mem bits of memory, None is returned. 235 min_bufsize is the minimal result to be considered.""" 236 237 if max_mem is None: 238 succ_buf = min_bufsize - 1 239 fail_buf = min_bufsize 240 # First double iteratively until an upper bound for failure is found. 241 while True: 242 if attack_rate(period, fail_buf, attack_headers)[0] < attack_headers: 243 break 244 succ_buf, fail_buf = fail_buf, 3 * fail_buf - 2 * succ_buf 245 else: 246 # If a long low-work header chain exists that exceeds max_mem already, give up. 247 if find_max_headers(when) // period > max_mem: 248 return None 249 # Otherwise, verify that the maximal buffer size that permits a mainchain sync with less 250 # than max_mem memory is sufficient to get the attack rate below attack_headers. If not, 251 # also give up. 252 max_buf = (max_mem - (MINCHAINWORK_HEADERS // period)) // COMPACT_HEADER_SIZE 253 if max_buf < min_bufsize: 254 return None 255 if attack_rate(period, max_buf, attack_headers)[0] >= attack_headers: 256 return None 257 # If it is sufficient, that's an upper bound to start our search. 258 succ_buf = min_bufsize - 1 259 fail_buf = max_buf 260 261 # Then perform a bisection search to narrow it down. 262 while fail_buf > succ_buf + 1: 263 try_buf = (succ_buf + fail_buf) // 2 264 if attack_rate(period, try_buf, attack_headers)[0] >= attack_headers: 265 succ_buf = try_buf 266 else: 267 fail_buf = try_buf 268 return fail_buf 269 270 271 def optimize(when): 272 """Find the best (period, bufsize) configuration.""" 273 274 # When period*bufsize = memory_scale, the per-peer memory for a mainchain sync and a maximally 275 # long low-difficulty header sync are equal. 276 memory_scale = (find_max_headers(when) - MINCHAINWORK_HEADERS) / COMPACT_HEADER_SIZE 277 # Compute approximation for {bufsize/period}, using a formula for a simplified problem. 278 approx_ratio = lambert_w(log(4) * memory_scale / ATTACK_HEADERS**2) / log(4) 279 # Use those for a first attempt. 280 print("Searching configurations:") 281 period = int(sqrt(memory_scale / approx_ratio) + 0.5) 282 bufsize = find_bufsize(period, ATTACK_HEADERS, when) 283 mem = memory_usage(period, bufsize, when) 284 best = (period, bufsize, mem) 285 maps = [(period, bufsize), (MINCHAINWORK_HEADERS + 1, None)] 286 print(f"- Initial: period={period}, buffer={bufsize}, mem={mem[0] / 8192:.3f} KiB") 287 288 # Consider all period values between 1 and MINCHAINWORK_HEADERS, except the one just tried. 289 periods = [iv for iv in range(1, MINCHAINWORK_HEADERS + 1) if iv != period] 290 # Iterate, picking a random element from periods, computing its corresponding bufsize, and 291 # then using the result to shrink the period. 292 while True: 293 # Remove all periods whose memory usage for low-work long chain sync exceed the best 294 # memory usage we've found so far. 295 periods = [p for p in periods if find_max_headers(when) // p < best[2][0]] 296 # Stop if there is nothing left to try. 297 if len(periods) == 0: 298 break 299 # Pick a random remaining option for period size, and compute corresponding bufsize. 300 period = periods.pop(random.randrange(len(periods))) 301 # The buffer size (at a given attack level) cannot shrink as the period grows. Find the 302 # largest period smaller than the selected one we know the buffer size for, and use that 303 # as a lower bound to find_bufsize. 304 min_bufsize = max([(p, b) for p, b in maps if p < period] + [(0,0)])[1] 305 bufsize = find_bufsize(period, ATTACK_HEADERS, when, best[2][0], min_bufsize) 306 if bufsize is not None: 307 # We found a (period, bufsize) configuration with better memory usage than our best 308 # so far. Remember it for future lower bounds. 309 maps.append((period, bufsize)) 310 mem = memory_usage(period, bufsize, when) 311 assert mem[0] <= best[2][0] 312 if ASSUME_CONVEX: 313 # Remove all periods that are on the other side of the former best as the new 314 # best. 315 periods = [p for p in periods if (p < best[0]) == (period < best[0])] 316 best = (period, bufsize, mem) 317 print(f"- New best: period={period}, buffer={bufsize}, mem={mem[0] / 8192:.3f} KiB") 318 else: 319 # The (period, bufsize) configuration we found is worse than what we already had. 320 if ASSUME_CONVEX: 321 # Remove all periods that are on the other side of the tried configuration as the 322 # best one. 323 periods = [p for p in periods if (p < period) == (best[0] < period)] 324 325 # Return the result. 326 period, bufsize, _ = best 327 return period, bufsize 328 329 330 def analyze(when): 331 """Find the best configuration and print it out.""" 332 333 period, bufsize = optimize(when) 334 # Compute accurate statistics for the best found configuration. 335 _, mem_mainchain, mem_timewarp = memory_usage(period, bufsize, when) 336 headers_per_attack, _ = attack_rate(period, bufsize) 337 attack_volume = NET_HEADER_SIZE * MINCHAINWORK_HEADERS 338 # And report them. 339 print() 340 print(f"Given current min chainwork headers of {MINCHAINWORK_HEADERS}, the optimal parameters for low") 341 print(f"memory usage on mainchain for release until {TIME:%Y-%m-%d} is:") 342 print() 343 print(f" // Generated by headerssync-params.py on {datetime.today():%Y-%m-%d}.") 344 print( " m_headers_sync_params = HeadersSyncParams{") 345 print(f" .commitment_period = {period},") 346 print(f" .redownload_buffer_size = {bufsize}," 347 f" // {bufsize}/{period} = ~{bufsize/period:.1f} commitments") 348 print( " };") 349 print() 350 print("Properties:") 351 print(f"- Per-peer memory for mainchain sync: {mem_mainchain / 8192:.3f} KiB") 352 print(f"- Per-peer memory for timewarp attack: {mem_timewarp / 8192:.3f} KiB") 353 print(f"- Attack rate: {1/headers_per_attack:.1f} attacks for 1 header of memory growth") 354 print(f" (where each attack costs {attack_volume / 8388608:.3f} MiB bandwidth)") 355 356 357 analyze(TIME)