bench_bft_sync.sh
1 #!/bin/bash 2 3 ################################################################# 4 # Measures a validator syncing 1000 blocks from another validator 5 ################################################################# 6 7 set -eo pipefail # error on any command failure 8 9 network_id=1 10 min_height=250 11 12 # The total number of validators in the beacon committee. 13 # This must match the number of validators used when generating the snapshot. 14 num_validators=40 15 16 # The number of validators that are syncing 17 num_nodes=1 18 19 # Adjust this to show more/less log messages 20 log_filter="info,snarkos_node_sync=trace,snarkos_node_bft::sync=trace,snarkos_node_bft::primary=warn,snarkos_node_rest=warn" 21 22 max_wait=1800 # Wait for up to 30 minutes 23 poll_interval=1 # Check block heights every second 24 25 #shellcheck source=SCRIPTDIR/utils.sh 26 . .ci/utils.sh 27 28 branch_name=$(git rev-parse --abbrev-ref HEAD) 29 echo "On branch: ${branch_name}" 30 31 network_name=$(get_network_name $network_id) 32 echo "Using network: $network_name (ID: $network_id)" 33 34 snapshot_info=$(<info.txt) 35 echo "Snapshot_info: ${snapshot_info}" 36 37 # Create log directory 38 log_dir=".logs-$(date +"%Y%m%d%H%M%S")" 39 mkdir -p "$log_dir" 40 41 # Define a trap handler that cleans up all processes on exit. 42 # shellcheck disable=SC2329 43 function exit_handler() { 44 stop_nodes 45 } 46 trap exit_handler EXIT 47 48 # Define a trap handler that prints a message when an error occurs 49 trap 'echo "âī¸ Error in $BASH_SOURCE at line $LINENO: \"$BASH_COMMAND\" failed (exit $?)"' ERR 50 51 # Shared flags betwen all nodes 52 common_flags=( 53 --nobanner --noupdater --nodisplay \ 54 "--network=$network_id" 55 --nocdn 56 "--dev-num-clients=0" 57 "--dev-num-validators=$num_validators" 58 --no-dev-txs 59 "--log-filter=$log_filter" 60 ) 61 62 # The validator that has the ledger to by synced from. 63 $TASKSET1 snarkos start --dev 0 --validator "${common_flags[@]}" \ 64 --logfile="$log_dir/validator-0.log" & 65 PIDS[0]=$! 66 67 # Stores the list of all validators. 68 validators="127.0.0.1:5000" 69 70 # Spawn the clients that will sync the ledger 71 for node_index in $(seq 1 "$num_nodes"); do 72 name="validator-$node_index" 73 74 # Ensure there are no old ledger files and the node syncs from scratch 75 snarkos clean "--dev=$node_index" "--network=$network_id" || true 76 77 $TASKSET2 snarkos start "--dev=$node_index" --validator \ 78 "${common_flags[@]}" "--validators=$validators" \ 79 "--logfile=$log_dir/$name.log" & 80 PIDS[node_index]=$! 81 82 # Add the validators BFT address to the validators list. 83 bft_port=$((5000 + node_index)) 84 validators="$validators,127.0.0.1:$bft_port" 85 86 # Add 1-second delay between starting nodes to avoid hitting rate limits 87 sleep 1 88 done 89 90 # Block until nodes are running and connected to each other. 91 wait_for_nodes $((num_nodes+1)) 0 92 93 SECONDS=0 94 95 # TODO add API call for number of connected validators. 96 #for ((node_index = 0; node_index < num_nodes+1; node_index++)); do 97 # if ! (wait_for_peers "$node_index" $num_nodes); then 98 # exit 1 99 # fi 100 #done 101 102 connect_time=$SECONDS 103 echo "âšī¸ Nodes are fully connected (took $connect_time secs). Starting block sync measurement." 104 105 # Check heights periodically with a timeout 106 SECONDS=0 107 while (( SECONDS < max_wait )); do 108 # The last block cannot be fully applied to the ledger yet as there is no next block to confirm it. 109 # However, we know that the sync height of a node is always at least one more than the ledger height. 110 expected_height=$((min_height-1)) 111 112 if check_heights 1 $((num_nodes+1)) $expected_height "$network_name" "$SECONDS"; then 113 total_wait=$SECONDS 114 throughput=$(compute_throughput "$min_height" "$total_wait") 115 116 echo "đ BFT sync benchmark done! Waited $total_wait seconds for $min_height blocks. Throughput was $throughput blocks/s." 117 118 # Append data to results file. 119 printf "{ \"name\": \"bft-sync\", \"unit\": \"blocks/s\", \"value\": %.3f, \"extra\": \"total_wait=%is, target_height=%i, connect_time=%i, branch=%s, %s\" },\n" \ 120 "$throughput" "$total_wait" "$min_height" "$connect_time" "$branch_name" "$snapshot_info"| tee -a results.json 121 exit 0 122 fi 123 124 # Continue waiting 125 sleep $poll_interval 126 done 127 128 echo "â Benchmark failed! Validators did not sync within 30 minutes." 129 print_client_logs "$log_dir" "$num_validators" "$num_nodes" 130 131 exit 1