/ testnet / scripts / reset-deploy / test-single-server.sh
test-single-server.sh
  1  #!/usr/bin/env bash
  2  # =============================================================================
  3  # Single Server Test - Validate deployment workflow on one server
  4  # =============================================================================
  5  # Tests all deployment phases on a single server (testnet001) before
  6  # running full 6-server deployment. Validates:
  7  # - SSH connectivity and permissions
  8  # - Cleanup operations
  9  # - Build process
 10  # - Key generation
 11  # - Service deployment
 12  # - Monitoring capabilities
 13  #
 14  # Usage:
 15  #   ./test-single-server.sh [options]
 16  #
 17  # Options:
 18  #   --server N          Server number to test (default: 1)
 19  #   --target-height N   Target block height (default: 10)
 20  #   --skip-build        Skip build phase
 21  #   --skip-cleanup      Skip cleanup phase
 22  #   --help              Show help
 23  
 24  set -euo pipefail
 25  
 26  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 27  source "${SCRIPT_DIR}/lib/common.sh"
 28  source "${SCRIPT_DIR}/lib/servers.sh"
 29  source "${SCRIPT_DIR}/lib/ssh.sh"
 30  
 31  # Test configuration
 32  TEST_SERVER="${TEST_SERVER:-1}"
 33  TARGET_HEIGHT="${TARGET_HEIGHT:-10}"
 34  SKIP_BUILD=false
 35  SKIP_CLEANUP=false
 36  export DRY_RUN=false
 37  
 38  ADNET_DIR="${ADNET_DIR:-/home/marco/working-repos/adnet}"
 39  BINARY_PATH="${ADNET_DIR}/target/release/adnet"
 40  TEST_DIR="/tmp/adnet-single-test-$(date +%Y%m%d-%H%M%S)"
 41  
 42  # Parse arguments
 43  show_help() {
 44      cat << EOF
 45  Single Server Test - Validate Deployment Workflow
 46  
 47  Usage: ${0##*/} [OPTIONS]
 48  
 49  Tests the complete deployment workflow on a single server before running
 50  the full 6-server deployment. This validates all operations are working
 51  correctly without risking the entire testnet.
 52  
 53  Options:
 54    --server N          Server number to test (1-6, default: 1)
 55    --target-height N   Block height to monitor (default: 10)
 56    --skip-build        Skip build phase (use existing binary)
 57    --skip-cleanup      Skip cleanup phase
 58    --help              Show this help
 59  
 60  Examples:
 61    # Test on server 1 with 10 blocks
 62    ${0##*/}
 63  
 64    # Test on server 2 with 20 blocks
 65    ${0##*/} --server 2 --target-height 20
 66  
 67    # Quick test without rebuild
 68    ${0##*/} --skip-build
 69  
 70  EOF
 71  }
 72  
 73  while [[ $# -gt 0 ]]; do
 74      case $1 in
 75          --server)
 76              TEST_SERVER="$2"
 77              shift 2
 78              ;;
 79          --target-height)
 80              TARGET_HEIGHT="$2"
 81              shift 2
 82              ;;
 83          --skip-build)
 84              SKIP_BUILD=true
 85              shift
 86              ;;
 87          --skip-cleanup)
 88              SKIP_CLEANUP=true
 89              shift
 90              ;;
 91          --help)
 92              show_help
 93              exit 0
 94              ;;
 95          *)
 96              echo "Unknown option: $1"
 97              echo "Use --help for usage"
 98              exit 1
 99              ;;
100      esac
101  done
102  
103  # Validate server number
104  if [[ ! "${TEST_SERVER}" =~ ^[1-6]$ ]]; then
105      die "Invalid server number: ${TEST_SERVER}. Must be 1-6."
106  fi
107  
108  show_banner() {
109      cat << "EOF"
110  ╔═══════════════════════════════════════════════════════════════════╗
111  ║                                                                   ║
112  ║    Single Server Test - Deployment Validation                    ║
113  ║                                                                   ║
114  ╚═══════════════════════════════════════════════════════════════════╝
115  
116  EOF
117  }
118  
119  show_test_plan() {
120      local host
121      host=$(get_server_host "${TEST_SERVER}")
122      local role
123      role=$(get_server_role "${TEST_SERVER}")
124  
125      log_phase "TEST CONFIGURATION"
126  
127      cat << EOF
128  Test Server:
129    Server ID:    ${TEST_SERVER}
130    Hostname:     ${host}
131    Role:         ${role}
132  
133  Test Parameters:
134    Target Height: ${TARGET_HEIGHT} blocks (~$((TARGET_HEIGHT * 10 / 60)) min)
135    Skip Build:    ${SKIP_BUILD}
136    Skip Cleanup:  ${SKIP_CLEANUP}
137  
138  Test Phases:
139  EOF
140  
141      [[ "${SKIP_CLEANUP}" == "false" ]] && echo "  ✓ 1. Cleanup server" || echo "  ✗ 1. Cleanup (skipped)"
142      [[ "${SKIP_BUILD}" == "false" ]] && echo "  ✓ 2. Build binary" || echo "  ✗ 2. Build (skipped)"
143      echo "  ✓ 3. Generate test keys"
144      echo "  ✓ 4. Deploy configuration"
145      echo "  ✓ 5. Deploy binary"
146      echo "  ✓ 6. Start service"
147      echo "  ✓ 7. Monitor blocks (${TARGET_HEIGHT} blocks)"
148      echo "  ✓ 8. Verify operation"
149      echo "  ✓ 9. Cleanup test artifacts"
150      echo ""
151  }
152  
153  # Test Phase 1: Cleanup
154  test_cleanup() {
155      if [[ "${SKIP_CLEANUP}" == "true" ]]; then
156          log_warn "Skipping cleanup (--skip-cleanup)"
157          return 0
158      fi
159  
160      log_phase "TEST PHASE 1: Cleanup Server"
161  
162      local host
163      host=$(get_server_host "${TEST_SERVER}")
164  
165      log_info "Cleaning server ${TEST_SERVER} (${host})..."
166  
167      # Stop services
168      ssh_sudo "${TEST_SERVER}" "systemctl stop adnet 2>/dev/null || true"
169      ssh_sudo "${TEST_SERVER}" "systemctl disable adnet 2>/dev/null || true"
170      log_success "Services stopped"
171  
172      # Remove files
173      ssh_sudo "${TEST_SERVER}" "rm -f /etc/systemd/system/adnet*.service"
174      ssh_sudo "${TEST_SERVER}" "rm -f /usr/local/bin/adnet"
175      ssh_sudo "${TEST_SERVER}" "rm -rf /var/lib/adnet"
176      ssh_sudo "${TEST_SERVER}" "rm -rf /etc/adnet"
177      ssh_sudo "${TEST_SERVER}" "rm -rf /var/run/adnet"
178      ssh_sudo "${TEST_SERVER}" "systemctl daemon-reload"
179      log_success "Artifacts removed"
180  
181      log_success "Cleanup complete"
182  }
183  
184  # Test Phase 2: Build
185  test_build() {
186      if [[ "${SKIP_BUILD}" == "true" ]]; then
187          log_warn "Skipping build (--skip-build)"
188  
189          if [[ ! -f "${BINARY_PATH}" ]]; then
190              die "Binary not found: ${BINARY_PATH}"
191          fi
192  
193          log_info "Using existing binary: ${BINARY_PATH}"
194          return 0
195      fi
196  
197      log_phase "TEST PHASE 2: Build Binary"
198  
199      cd "${ADNET_DIR}"
200  
201      log_info "Building adnet..."
202      log_warn "This may take 15-30 min on first build..."
203  
204      if cargo build --release --bin adnet 2>&1 | tee "${TEST_DIR}/build.log"; then
205          log_success "Build succeeded"
206      else
207          log_error "Build failed. Check ${TEST_DIR}/build.log"
208          return 1
209      fi
210  
211      # Verify binary
212      if [[ ! -f "${BINARY_PATH}" ]]; then
213          die "Binary not found after build: ${BINARY_PATH}"
214      fi
215  
216      local version
217      version=$("${BINARY_PATH}" --version 2>/dev/null || echo "UNKNOWN")
218      log_success "Binary ready: ${version}"
219  }
220  
221  # Test Phase 3: Generate Keys
222  test_generate_keys() {
223      log_phase "TEST PHASE 3: Generate Test Keys"
224  
225      mkdir -p "${TEST_DIR}/keys"
226  
227      log_info "Generating validator account key..."
228  
229      # Generate account and capture output
230      local account_output
231      account_output=$("${BINARY_PATH}" alpha account new 2>&1)
232  
233      if [[ $? -eq 0 ]]; then
234          # Parse the output to extract keys
235          local private_key
236          private_key=$(echo "${account_output}" | grep "Private Key:" | awk '{print $NF}')
237          local view_key
238          view_key=$(echo "${account_output}" | grep "View Key:" | awk '{print $NF}')
239          local address
240          address=$(echo "${account_output}" | grep "Address:" | awk '{print $NF}')
241  
242          # Save to JSON file
243          cat > "${TEST_DIR}/keys/account.json" << EOF
244  {
245      "type": "alpha_account",
246      "private_key": "${private_key}",
247      "view_key": "${view_key}",
248      "address": "${address}",
249      "created_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
250  }
251  EOF
252  
253          log_success "Account key generated"
254          log_info "Address: ${address}"
255      else
256          log_error "Failed to generate account key"
257          return 1
258      fi
259  
260      # Create network key placeholder
261      cat > "${TEST_DIR}/keys/network.json" << EOF
262  {
263      "type": "ed25519",
264      "note": "Generated on first adnet startup"
265  }
266  EOF
267  
268      chmod 600 "${TEST_DIR}/keys"/*.json
269      log_success "Keys generated successfully"
270  }
271  
272  # Test Phase 4: Deploy Config
273  test_deploy_config() {
274      log_phase "TEST PHASE 4: Deploy Configuration"
275  
276      local host
277      host=$(get_server_host "${TEST_SERVER}")
278  
279      # Create test config
280      local test_config="${TEST_DIR}/adnet-test.toml"
281      cp "${ADNET_DIR}/testnet/config/adnet-testnet.toml" "${test_config}"
282  
283      # Modify for single-node testing
284      sed -i 's/node_type = "validator"/node_type = "validator"/' "${test_config}"
285      sed -i 's/bootstrap_peers = \[/bootstrap_peers = [\n    # Single node test - no bootstrap peers/' "${test_config}"
286  
287      log_info "Creating directories on ${host}..."
288      ssh_sudo "${TEST_SERVER}" "mkdir -p /var/lib/adnet/{data,keys,genesis}"
289      ssh_sudo "${TEST_SERVER}" "mkdir -p /etc/adnet"
290      ssh_sudo "${TEST_SERVER}" "mkdir -p /var/run/adnet"
291      log_success "Directories created"
292  
293      log_info "Deploying configuration..."
294      scp_to "${TEST_SERVER}" "${test_config}" "/tmp/config.toml"
295      ssh_sudo "${TEST_SERVER}" "mv /tmp/config.toml /etc/adnet/config.toml"
296      ssh_sudo "${TEST_SERVER}" "chmod 644 /etc/adnet/config.toml"
297      log_success "Configuration deployed"
298  
299      log_info "Deploying keys..."
300      scp_to "${TEST_SERVER}" "${TEST_DIR}/keys/"* "/tmp/"
301      ssh_sudo "${TEST_SERVER}" "mv /tmp/*.json /var/lib/adnet/keys/"
302      ssh_sudo "${TEST_SERVER}" "chmod 600 /var/lib/adnet/keys/*.json"
303      log_success "Keys deployed"
304  
305      # Set ownership
306      ssh_sudo "${TEST_SERVER}" "chown -R ${SSH_USER}:${SSH_USER} /var/lib/adnet"
307      log_success "Configuration deployment complete"
308  }
309  
310  # Test Phase 5: Deploy Binary
311  test_deploy_binary() {
312      log_phase "TEST PHASE 5: Deploy Binary"
313  
314      local host
315      host=$(get_server_host "${TEST_SERVER}")
316  
317      log_info "Copying binary to ${host}..."
318      scp_to "${TEST_SERVER}" "${BINARY_PATH}" "/tmp/adnet"
319  
320      log_info "Installing binary..."
321      ssh_sudo "${TEST_SERVER}" "install -m 755 /tmp/adnet /usr/local/bin/adnet"
322      ssh_sudo "${TEST_SERVER}" "rm /tmp/adnet"
323  
324      # Verify
325      local version
326      version=$(ssh_exec "${TEST_SERVER}" "/usr/local/bin/adnet --version" || echo "UNKNOWN")
327      log_success "Binary deployed: ${version}"
328  }
329  
330  # Test Phase 6: Start Service
331  test_start_service() {
332      log_phase "TEST PHASE 6: Start Service"
333  
334      local host
335      host=$(get_server_host "${TEST_SERVER}")
336  
337      # Create systemd service
338      log_info "Creating systemd service..."
339      local service_file="${TEST_DIR}/adnet.service"
340      cat > "${service_file}" << 'EOF'
341  [Unit]
342  Description=ADNet Unified Node (Test)
343  After=network.target
344  
345  [Service]
346  Type=simple
347  User=devops
348  WorkingDirectory=/var/lib/adnet
349  ExecStart=/usr/local/bin/adnet start --config /etc/adnet/config.toml
350  Restart=always
351  RestartSec=10
352  StandardOutput=journal
353  StandardError=journal
354  SyslogIdentifier=adnet
355  
356  LimitNOFILE=65536
357  LimitNPROC=4096
358  
359  NoNewPrivileges=true
360  PrivateTmp=true
361  ProtectSystem=strict
362  ProtectHome=true
363  ReadWritePaths=/var/lib/adnet /var/run/adnet
364  
365  [Install]
366  WantedBy=multi-user.target
367  EOF
368  
369      scp_to "${TEST_SERVER}" "${service_file}" "/tmp/adnet.service"
370      ssh_sudo "${TEST_SERVER}" "mv /tmp/adnet.service /etc/systemd/system/adnet.service"
371      ssh_sudo "${TEST_SERVER}" "chmod 644 /etc/systemd/system/adnet.service"
372      ssh_sudo "${TEST_SERVER}" "systemctl daemon-reload"
373      ssh_sudo "${TEST_SERVER}" "systemctl enable adnet"
374      log_success "Service configured"
375  
376      log_info "Starting adnet service..."
377      ssh_sudo "${TEST_SERVER}" "systemctl start adnet"
378  
379      # Wait for startup
380      sleep 3
381  
382      # Verify service is running
383      if ssh_sudo "${TEST_SERVER}" "systemctl is-active adnet" | grep -q "active"; then
384          log_success "Service started successfully"
385      else
386          log_error "Service failed to start"
387          log_error "Recent logs:"
388          ssh_sudo "${TEST_SERVER}" "journalctl -u adnet -n 30 --no-pager" || true
389          return 1
390      fi
391  
392      # Wait a bit more for initialization
393      log_info "Waiting for node initialization..."
394      sleep 5
395  }
396  
397  # Test Phase 7: Monitor Blocks
398  test_monitor_blocks() {
399      log_phase "TEST PHASE 7: Monitor Block Production"
400  
401      log_info "Monitoring until block height ${TARGET_HEIGHT}..."
402      log_info "Polling every 2 seconds"
403      echo ""
404  
405      local start_time
406      start_time=$(date +%s)
407      local last_height=0
408      local stuck_count=0
409  
410      while true; do
411          # Query block height
412          local height
413          height=$(ssh_exec "${TEST_SERVER}" "adnet status --format json 2>/dev/null" | \
414              jq -r '.alpha.block_height // 0' 2>/dev/null || echo "0")
415  
416          # Validate it's a number
417          if [[ ! "${height}" =~ ^[0-9]+$ ]]; then
418              height=0
419          fi
420  
421          # Calculate progress
422          local progress=$((height * 100 / TARGET_HEIGHT))
423          if [[ ${progress} -gt 100 ]]; then
424              progress=100
425          fi
426  
427          local blocks_remaining=$((TARGET_HEIGHT - height))
428          if [[ ${blocks_remaining} -lt 0 ]]; then
429              blocks_remaining=0
430          fi
431  
432          # Check if making progress
433          if [[ ${height} -eq ${last_height} && ${height} -gt 0 ]]; then
434              stuck_count=$((stuck_count + 1))
435          else
436              stuck_count=0
437          fi
438          last_height=${height}
439  
440          # Display status
441          echo -ne "\rBlock: ${CYAN}${height}${NC}/${TARGET_HEIGHT} (${progress}%) "
442  
443          if [[ ${stuck_count} -ge 3 ]]; then
444              echo -ne "${YELLOW}⚠ No progress (${stuck_count} checks)${NC}   "
445          elif [[ ${height} -gt 0 ]]; then
446              echo -ne "${GREEN}✓ Producing blocks${NC}   "
447          else
448              echo -ne "${YELLOW}⏳ Starting...${NC}   "
449          fi
450  
451          # Check if target reached
452          if [[ ${height} -ge ${TARGET_HEIGHT} ]]; then
453              echo ""
454              log_success "Target height ${TARGET_HEIGHT} reached!"
455  
456              local end_time
457              end_time=$(date +%s)
458              local duration=$((end_time - start_time))
459              local avg_block_time=$((duration / height))
460  
461              log_info "Duration: ${duration}s"
462              log_info "Average block time: ${avg_block_time}s"
463              break
464          fi
465  
466          # Timeout if stuck for too long
467          if [[ ${stuck_count} -ge 20 ]]; then
468              echo ""
469              log_error "Node stuck at height ${height} for ${stuck_count} checks"
470              log_error "Recent logs:"
471              ssh_sudo "${TEST_SERVER}" "journalctl -u adnet -n 30 --no-pager" || true
472              return 1
473          fi
474  
475          sleep 2
476      done
477  }
478  
479  # Test Phase 8: Verify Operation
480  test_verify_operation() {
481      log_phase "TEST PHASE 8: Verify Operation"
482  
483      local host
484      host=$(get_server_host "${TEST_SERVER}")
485  
486      # Check service status
487      log_info "Checking service status..."
488      if ssh_sudo "${TEST_SERVER}" "systemctl is-active adnet" | grep -q "active"; then
489          log_success "Service is active"
490      else
491          log_error "Service is not active"
492          return 1
493      fi
494  
495      # Query full status
496      log_info "Querying node status..."
497      local status
498      status=$(ssh_exec "${TEST_SERVER}" "adnet status --format json 2>/dev/null" || echo "{}")
499  
500      if [[ -n "${status}" && "${status}" != "{}" ]]; then
501          log_success "Status query successful"
502  
503          # Extract key metrics
504          local height
505          height=$(echo "${status}" | jq -r '.alpha.block_height // 0')
506          local syncing
507          syncing=$(echo "${status}" | jq -r '.alpha.syncing // true')
508  
509          log_info "Block height: ${height}"
510          log_info "Syncing: ${syncing}"
511  
512          # Check if network info available
513          local peer_id
514          peer_id=$(echo "${status}" | jq -r '.network.peer_id // empty' 2>/dev/null || echo "")
515          if [[ -n "${peer_id}" ]]; then
516              log_info "Peer ID: ${peer_id:0:16}..."
517          fi
518      else
519          log_warn "Status query returned empty result"
520      fi
521  
522      # Check logs for errors
523      log_info "Checking for errors in logs..."
524      local error_count
525      error_count=$(ssh_sudo "${TEST_SERVER}" "journalctl -u adnet --since '5 minutes ago' | grep -i error | wc -l" || echo "0")
526  
527      if [[ ${error_count} -eq 0 ]]; then
528          log_success "No errors in recent logs"
529      else
530          log_warn "${error_count} errors found in logs (may be normal)"
531      fi
532  
533      log_success "Verification complete"
534  }
535  
536  # Test Phase 9: Cleanup
537  test_cleanup_artifacts() {
538      log_phase "TEST PHASE 9: Cleanup Test Artifacts"
539  
540      if ! confirm "Stop the test node and cleanup?" "y"; then
541          log_warn "Skipping cleanup - test node still running"
542          log_info "To cleanup manually later:"
543          log_info "  ssh ${SSH_USER}@$(get_server_host ${TEST_SERVER}) sudo systemctl stop adnet"
544          return 0
545      fi
546  
547      log_info "Stopping service..."
548      ssh_sudo "${TEST_SERVER}" "systemctl stop adnet"
549      ssh_sudo "${TEST_SERVER}" "systemctl disable adnet"
550      log_success "Service stopped"
551  
552      log_info "Removing test deployment..."
553      ssh_sudo "${TEST_SERVER}" "rm -f /etc/systemd/system/adnet.service"
554      ssh_sudo "${TEST_SERVER}" "rm -f /usr/local/bin/adnet"
555      ssh_sudo "${TEST_SERVER}" "rm -rf /var/lib/adnet"
556      ssh_sudo "${TEST_SERVER}" "rm -rf /etc/adnet"
557      ssh_sudo "${TEST_SERVER}" "systemctl daemon-reload"
558      log_success "Test deployment removed"
559  
560      log_info "Cleaning local test directory..."
561      rm -rf "${TEST_DIR}"
562      log_success "Local artifacts cleaned"
563  
564      log_success "Cleanup complete"
565  }
566  
567  # Show test summary
568  show_summary() {
569      log_phase "TEST SUMMARY"
570  
571      cat << EOF
572  Test Configuration:
573    Server:        $(get_server_host ${TEST_SERVER})
574    Target Height: ${TARGET_HEIGHT} blocks
575    Test Dir:      ${TEST_DIR}
576  
577  Phases Completed:
578    ✓ 1. Cleanup server
579    ✓ 2. Build binary
580    ✓ 3. Generate keys
581    ✓ 4. Deploy configuration
582    ✓ 5. Deploy binary
583    ✓ 6. Start service
584    ✓ 7. Monitor blocks
585    ✓ 8. Verify operation
586    ✓ 9. Cleanup artifacts
587  
588  Result: ${GREEN}ALL TESTS PASSED${NC}
589  
590  The deployment workflow is validated and ready for full 6-server deployment.
591  
592  Next Steps:
593    1. Review test logs if needed
594    2. Run full deployment:
595       ./testnet-reset-deploy.sh --dry-run    # Preview first
596       ./testnet-reset-deploy.sh              # Full deployment
597  
598  EOF
599  }
600  
601  # Main test execution
602  main() {
603      show_banner
604      show_test_plan
605  
606      if ! confirm "Ready to run single-server test?" "y"; then
607          log_error "Test cancelled"
608          exit 1
609      fi
610  
611      echo ""
612  
613      # Create test directory
614      mkdir -p "${TEST_DIR}"
615      log_info "Test directory: ${TEST_DIR}"
616      echo ""
617  
618      # Verify connectivity
619      log_info "Testing SSH connectivity..."
620      if ! test_ssh_connection "${TEST_SERVER}"; then
621          die "Cannot connect to server ${TEST_SERVER}"
622      fi
623      log_success "SSH connection verified"
624      echo ""
625  
626      # Run test phases
627      test_cleanup || die "Cleanup phase failed"
628      echo ""
629  
630      test_build || die "Build phase failed"
631      echo ""
632  
633      test_generate_keys || die "Key generation failed"
634      echo ""
635  
636      test_deploy_config || die "Config deployment failed"
637      echo ""
638  
639      test_deploy_binary || die "Binary deployment failed"
640      echo ""
641  
642      test_start_service || die "Service start failed"
643      echo ""
644  
645      test_monitor_blocks || die "Block monitoring failed"
646      echo ""
647  
648      test_verify_operation || die "Verification failed"
649      echo ""
650  
651      test_cleanup_artifacts || true  # Don't fail if user skips cleanup
652      echo ""
653  
654      show_summary
655      log_success "Single-server test completed successfully!"
656  }
657  
658  main "$@"