/ .ci / bench_bft_sync.sh
bench_bft_sync.sh
  1  #!/bin/bash
  2  
  3  #################################################################
  4  # Measures a validator syncing 1000 blocks from another validator
  5  #################################################################
  6  
  7  set -eo pipefail # error on any command failure
  8  
  9  network_id=1
 10  min_height=250
 11  
 12  # The total number of validators in the beacon committee.
 13  # This must match the number of validators used when generating the snapshot. 
 14  num_validators=40
 15  
 16  # The number of validators that are syncing
 17  num_nodes=1
 18  
 19  # Adjust this to show more/less log messages
 20  log_filter="info,snarkos_node_sync=trace,snarkos_node_bft::sync=trace,snarkos_node_bft::primary=warn,snarkos_node_rest=warn"
 21  
 22  max_wait=1800 # Wait for up to 30 minutes
 23  poll_interval=1 # Check block heights every second
 24  
 25  #shellcheck source=SCRIPTDIR/utils.sh
 26  . .ci/utils.sh
 27  
 28  branch_name=$(git rev-parse --abbrev-ref HEAD)
 29  echo "On branch: ${branch_name}"
 30  
 31  network_name=$(get_network_name $network_id)
 32  echo "Using network: $network_name (ID: $network_id)"
 33  
 34  snapshot_info=$(<info.txt)
 35  echo "Snapshot_info: ${snapshot_info}"
 36  
 37  # Create log directory
 38  log_dir=".logs-$(date +"%Y%m%d%H%M%S")"
 39  mkdir -p "$log_dir"
 40  
 41  # Define a trap handler that cleans up all processes on exit.
 42  # shellcheck disable=SC2329
 43  function exit_handler() {
 44    stop_nodes
 45  }
 46  trap exit_handler EXIT
 47  
 48  # Define a trap handler that prints a message when an error occurs 
 49  trap 'echo "â›”ī¸ Error in $BASH_SOURCE at line $LINENO: \"$BASH_COMMAND\" failed (exit $?)"' ERR
 50  
 51  # Shared flags betwen all nodes
 52  common_flags=(
 53    --nobanner --noupdater --nodisplay \
 54    "--network=$network_id"
 55    --nocdn
 56    "--dev-num-clients=0"
 57    "--dev-num-validators=$num_validators"
 58    --no-dev-txs
 59    "--log-filter=$log_filter"
 60  )
 61  
 62  # The validator that has the ledger to by synced from.
 63  $TASKSET1 snarkos start --dev 0 --validator "${common_flags[@]}" \
 64    --logfile="$log_dir/validator-0.log" &
 65  PIDS[0]=$!
 66  
 67  # Stores the list of all validators.
 68  validators="127.0.0.1:5000"
 69  
 70  # Spawn the clients that will sync the ledger
 71  for node_index in $(seq 1 "$num_nodes"); do
 72    name="validator-$node_index"
 73  
 74    # Ensure there are no old ledger files and the node syncs from scratch
 75    snarkos clean "--dev=$node_index" "--network=$network_id" || true
 76  
 77    $TASKSET2 snarkos start "--dev=$node_index" --validator \
 78      "${common_flags[@]}" "--validators=$validators" \
 79      "--logfile=$log_dir/$name.log" &
 80    PIDS[node_index]=$!
 81  
 82    # Add the validators BFT address to the validators list.
 83    bft_port=$((5000 + node_index))
 84    validators="$validators,127.0.0.1:$bft_port"
 85  
 86    # Add 1-second delay between starting nodes to avoid hitting rate limits
 87    sleep 1
 88  done
 89  
 90  # Block until nodes are running and connected to each other.
 91  wait_for_nodes $((num_nodes+1)) 0
 92  
 93  SECONDS=0
 94  
 95  # TODO add API call for number of connected validators.
 96  #for ((node_index = 0; node_index < num_nodes+1; node_index++)); do
 97  #  if ! (wait_for_peers "$node_index" $num_nodes); then
 98  #    exit 1
 99  #  fi
100  #done
101  
102  connect_time=$SECONDS
103  echo "â„šī¸ Nodes are fully connected (took $connect_time secs). Starting block sync measurement."
104  
105  # Check heights periodically with a timeout
106  SECONDS=0
107  while (( SECONDS < max_wait )); do
108    # The last block cannot be fully applied to the ledger yet as there is no next block to confirm it.
109    # However, we know that the sync height of a node is always at least one more than the ledger height.
110    expected_height=$((min_height-1))
111    
112    if check_heights 1 $((num_nodes+1)) $expected_height "$network_name" "$SECONDS"; then
113      total_wait=$SECONDS
114      throughput=$(compute_throughput "$min_height" "$total_wait")
115  
116      echo "🎉 BFT sync benchmark done! Waited $total_wait seconds for $min_height blocks. Throughput was $throughput blocks/s."
117  
118      # Append data to results file.
119      printf "{ \"name\": \"bft-sync\", \"unit\": \"blocks/s\", \"value\": %.3f, \"extra\": \"total_wait=%is, target_height=%i, connect_time=%i, branch=%s, %s\" },\n" \
120         "$throughput" "$total_wait" "$min_height" "$connect_time" "$branch_name" "$snapshot_info"| tee -a results.json
121      exit 0
122    fi
123    
124    # Continue waiting
125    sleep $poll_interval
126  done
127  
128  echo "❌ Benchmark failed! Validators did not sync within 30 minutes."
129  print_client_logs "$log_dir" "$num_validators" "$num_nodes"
130  
131  exit 1