/ contrib / devtools / headerssync-params.py
headerssync-params.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2022 Pieter Wuille
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  """Script to find the optimal parameters for the headerssync module through simulation."""
  7  
  8  from datetime import datetime, timedelta
  9  from math import log, exp, sqrt
 10  import random
 11  
 12  # Parameters:
 13  
 14  # Aim for still working fine at some point in the future. [datetime]
 15  TIME = datetime(2028, 10, 10)
 16  
 17  # Expected block interval. [timedelta]
 18  BLOCK_INTERVAL = timedelta(seconds=600)
 19  
 20  # The number of headers corresponding to the minchainwork parameter. [headers]
 21  MINCHAINWORK_HEADERS = 938343
 22  
 23  # Combined processing bandwidth from all attackers to one victim. [bit/s]
 24  # 6 Gbit/s is approximately the speed at which a single thread of a Ryzen 5950X CPU thread can hash
 25  # headers. In practice, the victim's network bandwidth and network processing overheads probably
 26  # impose a far lower number, but it's a useful upper bound.
 27  ATTACK_BANDWIDTH = 6000000000
 28  
 29  # How much additional permanent memory usage are attackers (jointly) allowed to cause in the victim,
 30  # expressed as fraction of the normal memory usage due to mainchain growth, for the duration the
 31  # attack is sustained. [unitless]
 32  # 0.2 means that attackers, while they keep up the attack, can cause permanent memory usage due to
 33  # headers storage to grow at 1.2 header per BLOCK_INTERVAL.
 34  ATTACK_FRACTION = 0.2
 35  
 36  # When this is set, the mapping from period size to memory usage (at optimal buffer size for that
 37  # period) is assumed to be convex. This greatly speeds up the computation, and does not appear
 38  # to influence the outcome. Set to False for a stronger guarantee to get the optimal result.
 39  ASSUME_CONVEX = True
 40  
 41  # Explanation:
 42  #
 43  #  The headerssync module implements a DoS protection against low-difficulty header spam which does
 44  #  not rely on checkpoints. In short it works as follows:
 45  #
 46  #  - (initial) header synchronization is split into two phases:
 47  #    - A commitment phase, in which headers are downloaded from the peer, and a very compact
 48  #      commitment to them is remembered in per-peer memory. The commitment phase ends when the
 49  #      received chain's combined work reaches a predetermined threshold.
 50  #    - A redownload phase, during which the headers are downloaded a second time from the same peer,
 51  #      and compared against the commitment constructed in the first phase. If there is a match, the
 52  #      redownloaded headers are fed to validation and accepted into permanent storage.
 53  #
 54  #    This separation guarantees that no headers are accepted into permanent storage without
 55  #    requiring the peer to first prove the chain actually has sufficient work.
 56  #
 57  #  - To actually implement this commitment mechanism, the following approach is used:
 58  #    - Keep a *1 bit* commitment (constructed using a salted hash function), for every block whose
 59  #      height is a multiple of {period} plus an offset value. If RANDOMIZE_OFFSET, the offset,
 60  #      like the salt, is chosen randomly when the synchronization starts and kept fixed afterwards.
 61  #    - When redownloading, headers are fed through a per-peer queue that holds {bufsize} headers,
 62  #      before passing them to validation. All the headers in this queue are verified against the
 63  #      commitment bits created in the first phase before any header is released from it. This means
 64  #      {bufsize/period} bits are checked "on top of" each header before actually processing it,
 65  #      which results in a commitment structure with roughly {bufsize/period} bits of security, as
 66  #      once a header is modified, due to the prevhash inclusion, all future headers necessarily
 67  #      change as well.
 68  #
 69  #  The question is what these {period} and {bufsize} parameters need to be set to. This program
 70  #  exhaustively tests a range of values to find the optimal choice, taking into account:
 71  #
 72  #  - Minimizing the (maximum of) two scenarios that trigger per-peer memory usage:
 73  #
 74  #    - When downloading a (likely honest) chain that reaches the chainwork threshold after {n}
 75  #      blocks, and then redownloads them, we will consume per-peer memory that is sufficient to
 76  #      store {n/period} commitment bits and {bufsize} headers. We only consider attackers without
 77  #      sufficient hashpower (as otherwise they are from a PoW perspective not attackers), which
 78  #      means {n} is restricted to the honest chain's length before reaching minchainwork.
 79  #
 80  #    - When downloading a (likely false) chain of {n} headers that never reaches the chainwork
 81  #      threshold, we will consume per-peer memory that is sufficient to store {n/period}
 82  #      commitment bits. Such a chain may be very long, by exploiting the timewarp bug to avoid
 83  #      ramping up difficulty. There is however an absolute limit on how long such a chain can be: 6
 84  #      blocks per second since genesis, due to the increasing MTP consensus rule.
 85  #
 86  #  - Not gratuitously preventing synchronizing any valid chain, however difficult such a chain may
 87  #    be to construct. In particular, the above scenario with an enormous timewarp-expoiting chain
 88  #    cannot simply be ignored, as it is legal that the honest main chain is like that. We however
 89  #    do not bother minimizing the memory usage in that case (because a billion-header long honest
 90  #    chain will inevitably use far larger amounts of memory than designed for).
 91  #
 92  #  - Keep the rate at which attackers can get low-difficulty headers accepted to the block index
 93  #    negligible. Specifically, the possibility exists for an attacker to send the honest main
 94  #    chain's headers during the commitment phase, but then start deviating at an attacker-chosen
 95  #    point by sending novel low-difficulty headers instead. Depending on how high we set the
 96  #    {bufsize/period} ratio, we can make the probability that such a header makes it in
 97  #    arbitrarily small, but at the cost of higher memory during the redownload phase. It turns out,
 98  #    some rate of memory usage growth is expected anyway due to chain growth, so permitting the
 99  #    attacker to increase that rate by a small factor isn't concerning. The attacker may start
100  #    somewhat later than genesis, as long as the difficulty doesn't get too high. This reduces
101  #    the attacker bandwidth required at the cost of higher PoW needed for constructing the
102  #    alternate chain. This trade-off is ignored here, as it results in at most a small constant
103  #    factor in attack rate.
104  
105  
106  # System properties:
107  
108  # Headers in the redownload buffer are stored without prevhash. [bits]
109  COMPACT_HEADER_SIZE = 48 * 8
110  
111  # How many bits a header uses in P2P protocol. [bits]
112  NET_HEADER_SIZE = 81 * 8
113  
114  # How many headers are sent at once. [headers]
115  HEADER_BATCH_COUNT = 2000
116  
117  # Whether or not the offset of which blocks heights get checksummed is randomized.
118  RANDOMIZE_OFFSET = True
119  
120  # Timestamp of the genesis block
121  GENESIS_TIME = datetime(2009, 1, 3)
122  
123  # Derived values:
124  
125  # What rate of headers worth of RAM attackers are allowed to cause in the victim. [headers/s]
126  LIMIT_HEADERRATE = ATTACK_FRACTION / BLOCK_INTERVAL.total_seconds()
127  
128  # How many headers can attackers (jointly) send a victim per second. [headers/s]
129  NET_HEADERRATE = ATTACK_BANDWIDTH / NET_HEADER_SIZE
130  
131  # What fraction of headers sent by attackers can at most be accepted by a victim [unitless]
132  LIMIT_FRACTION = LIMIT_HEADERRATE / NET_HEADERRATE
133  
134  # How many headers we permit attackers to cause being accepted per attack. [headers/attack]
135  ATTACK_HEADERS = LIMIT_FRACTION * MINCHAINWORK_HEADERS
136  
137  
138  def find_max_headers(when):
139      """Compute the maximum number of headers a valid Bitcoin chain can have at given time."""
140      # When exploiting the timewarp attack, this can be up to 6 per second since genesis.
141      return 6 * ((when - GENESIS_TIME) // timedelta(seconds=1))
142  
143  
144  def lambert_w(value):
145      """Solve the equation x*exp(x)=value (x > 0, value > 0)."""
146      # Initial approximation.
147      approx = max(log(value), 0.0)
148      for _ in range(10):
149          # Newton-Rhapson iteration steps.
150          approx += (value * exp(-approx) - approx) / (approx + 1.0)
151      return approx
152  
153  
154  def attack_rate(period, bufsize, limit=None):
155      """Compute maximal accepted headers per attack in (period, bufsize) configuration.
156  
157      If limit is provided, the computation is stopped early when the result is known to exceed the
158      value in limit.
159      """
160  
161      max_rate = None
162      max_honest = None
163      # Let the current batch 0 being received be the first one in which the attacker starts lying.
164      # They will only ever start doing so right after a commitment block, but where that is can be
165      # in a number of places. Let honest be the number of honest headers in this current batch,
166      # preceding the forged ones.
167      for honest in range(HEADER_BATCH_COUNT):
168          # The number of headers the attack under consideration will on average get accepted.
169          # This is the number being computed.
170          rate = 0
171  
172          # Iterate over the possible alignments of commitments w.r.t. the first batch. In case
173          # the alignments are randomized, try all values. If not, the attacker can know/choose
174          # the alignment, and will always start forging right after a commitment.
175          if RANDOMIZE_OFFSET:
176              align_choices = list(range(period))
177          else:
178              align_choices = [(honest - 1) % period]
179          # Now loop over those possible alignment values, computing the average attack rate
180          # over them by dividing each contribution by len(align_choices).
181          for align in align_choices:
182              # These state variables capture the situation after receiving the first batch.
183              # - The number of headers received after the last commitment for an honest block:
184              after_good_commit = HEADER_BATCH_COUNT - honest + ((honest - align - 1) % period)
185              # - The number of forged headers in the redownload buffer:
186              forged_in_buf = HEADER_BATCH_COUNT - honest
187  
188              # Now iterate over the next batches of headers received, adding contributions to the
189              # rate variable.
190              while True:
191                  # Process the first HEADER_BATCH_COUNT headers in the buffer:
192                  accept_forged_headers = max(forged_in_buf - bufsize, 0)
193                  forged_in_buf -= accept_forged_headers
194                  if accept_forged_headers:
195                      # The probability the attack has not been detected yet at this point:
196                      prob = 0.5 ** (after_good_commit // period)
197                      # Update attack rate, divided by align_choices to average over the alignments.
198                      rate += accept_forged_headers * prob / len(align_choices)
199                      # If this means we exceed limit, bail out early (performance optimization).
200                      if limit is not None and rate >= limit:
201                          return rate, None
202                      # If the maximal term being added is negligible compared to rate, stop
203                      # iterating.
204                      if HEADER_BATCH_COUNT * prob < 1.0e-16 * rate * len(align_choices):
205                          break
206                  # Update state from a new incoming batch (which is all forged)
207                  after_good_commit += HEADER_BATCH_COUNT
208                  forged_in_buf += HEADER_BATCH_COUNT
209  
210          if max_rate is None or rate > max_rate:
211              max_rate = rate
212              max_honest = honest
213  
214      return max_rate, max_honest
215  
216  
217  def memory_usage(period, bufsize, when):
218      """How much memory (max,mainchain,timewarp) does the (period,bufsize) configuration need?"""
219  
220      # Per-peer memory usage for a timewarp chain that never meets minchainwork
221      mem_timewarp = find_max_headers(when) // period
222      # Per-peer memory usage for being fed the main chain
223      mem_mainchain = (MINCHAINWORK_HEADERS // period) + bufsize * COMPACT_HEADER_SIZE
224      # Maximum per-peer memory usage
225      max_mem = max(mem_timewarp, mem_mainchain)
226  
227      return max_mem, mem_mainchain, mem_timewarp
228  
229  def find_bufsize(period, attack_headers, when, max_mem=None, min_bufsize=1):
230      """Determine how big bufsize needs to be given a specific period length.
231  
232      Given a period, find the smallest value of bufsize such that the attack rate against the
233      (period, bufsize) configuration is below attack_headers. If max_mem is provided, and no
234      such bufsize exists that needs less than max_mem bits of memory, None is returned.
235      min_bufsize is the minimal result to be considered."""
236  
237      if max_mem is None:
238          succ_buf = min_bufsize - 1
239          fail_buf = min_bufsize
240          # First double iteratively until an upper bound for failure is found.
241          while True:
242              if attack_rate(period, fail_buf, attack_headers)[0] < attack_headers:
243                  break
244              succ_buf, fail_buf = fail_buf, 3 * fail_buf - 2 * succ_buf
245      else:
246          # If a long low-work header chain exists that exceeds max_mem already, give up.
247          if find_max_headers(when) // period > max_mem:
248              return None
249          # Otherwise, verify that the maximal buffer size that permits a mainchain sync with less
250          # than max_mem memory is sufficient to get the attack rate below attack_headers. If not,
251          # also give up.
252          max_buf = (max_mem - (MINCHAINWORK_HEADERS // period)) // COMPACT_HEADER_SIZE
253          if max_buf < min_bufsize:
254              return None
255          if attack_rate(period, max_buf, attack_headers)[0] >= attack_headers:
256              return None
257          # If it is sufficient, that's an upper bound to start our search.
258          succ_buf = min_bufsize - 1
259          fail_buf = max_buf
260  
261      # Then perform a bisection search to narrow it down.
262      while fail_buf > succ_buf + 1:
263          try_buf = (succ_buf + fail_buf) // 2
264          if attack_rate(period, try_buf, attack_headers)[0] >= attack_headers:
265              succ_buf = try_buf
266          else:
267              fail_buf = try_buf
268      return fail_buf
269  
270  
271  def optimize(when):
272      """Find the best (period, bufsize) configuration."""
273  
274      # When period*bufsize = memory_scale, the per-peer memory for a mainchain sync and a maximally
275      # long low-difficulty header sync are equal.
276      memory_scale = (find_max_headers(when) - MINCHAINWORK_HEADERS) / COMPACT_HEADER_SIZE
277      # Compute approximation for {bufsize/period}, using a formula for a simplified problem.
278      approx_ratio = lambert_w(log(4) * memory_scale / ATTACK_HEADERS**2) / log(4)
279      # Use those for a first attempt.
280      print("Searching configurations:")
281      period = int(sqrt(memory_scale / approx_ratio) + 0.5)
282      bufsize = find_bufsize(period, ATTACK_HEADERS, when)
283      mem = memory_usage(period, bufsize, when)
284      best = (period, bufsize, mem)
285      maps = [(period, bufsize), (MINCHAINWORK_HEADERS + 1, None)]
286      print(f"- Initial: period={period}, buffer={bufsize}, mem={mem[0] / 8192:.3f} KiB")
287  
288      # Consider all period values between 1 and MINCHAINWORK_HEADERS, except the one just tried.
289      periods = [iv for iv in range(1, MINCHAINWORK_HEADERS + 1) if iv != period]
290      # Iterate, picking a random element from periods, computing its corresponding bufsize, and
291      # then using the result to shrink the period.
292      while True:
293          # Remove all periods whose memory usage for low-work long chain sync exceed the best
294          # memory usage we've found so far.
295          periods = [p for p in periods if find_max_headers(when) // p < best[2][0]]
296          # Stop if there is nothing left to try.
297          if len(periods) == 0:
298              break
299          # Pick a random remaining option for period size, and compute corresponding bufsize.
300          period = periods.pop(random.randrange(len(periods)))
301          # The buffer size (at a given attack level) cannot shrink as the period grows. Find the
302          # largest period smaller than the selected one we know the buffer size for, and use that
303          # as a lower bound to find_bufsize.
304          min_bufsize = max([(p, b) for p, b in maps if p < period] + [(0,0)])[1]
305          bufsize = find_bufsize(period, ATTACK_HEADERS, when, best[2][0], min_bufsize)
306          if bufsize is not None:
307              # We found a (period, bufsize) configuration with better memory usage than our best
308              # so far. Remember it for future lower bounds.
309              maps.append((period, bufsize))
310              mem = memory_usage(period, bufsize, when)
311              assert mem[0] <= best[2][0]
312              if ASSUME_CONVEX:
313                  # Remove all periods that are on the other side of the former best as the new
314                  # best.
315                  periods = [p for p in periods if (p < best[0]) == (period < best[0])]
316              best = (period, bufsize, mem)
317              print(f"- New best: period={period}, buffer={bufsize}, mem={mem[0] / 8192:.3f} KiB")
318          else:
319              # The (period, bufsize) configuration we found is worse than what we already had.
320              if ASSUME_CONVEX:
321                  # Remove all periods that are on the other side of the tried configuration as the
322                  # best one.
323                  periods = [p for p in periods if (p < period) == (best[0] < period)]
324  
325      # Return the result.
326      period, bufsize, _ = best
327      return period, bufsize
328  
329  
330  def analyze(when):
331      """Find the best configuration and print it out."""
332  
333      period, bufsize = optimize(when)
334      # Compute accurate statistics for the best found configuration.
335      _, mem_mainchain, mem_timewarp = memory_usage(period, bufsize, when)
336      headers_per_attack, _ = attack_rate(period, bufsize)
337      attack_volume = NET_HEADER_SIZE * MINCHAINWORK_HEADERS
338      # And report them.
339      print()
340      print(f"Given current min chainwork headers of {MINCHAINWORK_HEADERS}, the optimal parameters for low")
341      print(f"memory usage on mainchain for release until {TIME:%Y-%m-%d} is:")
342      print()
343      print(f"        // Generated by headerssync-params.py on {datetime.today():%Y-%m-%d}.")
344      print( "        m_headers_sync_params = HeadersSyncParams{")
345      print(f"            .commitment_period = {period},")
346      print(f"            .redownload_buffer_size = {bufsize},"
347            f" // {bufsize}/{period} = ~{bufsize/period:.1f} commitments")
348      print( "        };")
349      print()
350      print("Properties:")
351      print(f"- Per-peer memory for mainchain sync: {mem_mainchain / 8192:.3f} KiB")
352      print(f"- Per-peer memory for timewarp attack: {mem_timewarp / 8192:.3f} KiB")
353      print(f"- Attack rate: {1/headers_per_attack:.1f} attacks for 1 header of memory growth")
354      print(f"  (where each attack costs {attack_volume / 8388608:.3f} MiB bandwidth)")
355  
356  
357  analyze(TIME)