ci-maintenance.cspec
1 # CI Monitoring & Maintenance 2 # phase: 6 3 # human_doc: devops/human/ALPHA-DELTA_CI_Setup_Guide_v2.md 4 # updated: 2026-01-22 5 # status: INSTALLED 6 7 # === RUNNER HEALTH SCRIPT (INSTALLED) === 8 health_script: 9 path: /usr/local/bin/runner-health.sh 10 installed: 2026-01-05 11 run: sudo /usr/local/bin/runner-health.sh 12 checks: 13 - cpu_usage: top -bn1 | head -5 14 - memory: free -h 15 - disk: df -h /opt/ci 16 - load_average: uptime 17 - sccache_stats: sccache --show-stats 18 - runner_status: systemctl is-active forgejo-runner 19 - active_builds: pgrep -a cargo 20 21 # === DAILY CLEANUP (INSTALLED) === 22 cleanup_cron: 23 path: /etc/cron.daily/ci-cleanup 24 installed: 2026-01-05 25 log: /var/log/ci-cleanup.log 26 tasks: 27 - find_delete: /opt/ci/workspaces target dirs (mtime +7) 28 - sccache_trim: if_over_25GB, delete atime +14 29 - artifacts_cleanup: /opt/ci/artifacts (mtime +30) 30 - cargo_registry: /home/devops/.cargo/registry/cache (atime +30) 31 32 # === DISK CLEANUP WATCHER (INSTALLED) === 33 disk_cleanup_watcher: 34 installed: 2026-01-22 35 purpose: auto_cleanup_when_disk_reaches_threshold 36 threshold: 85% 37 check_interval: 300s # 5 minutes 38 script: /home/devops/scripts/disk-cleanup-watch.sh 39 log: /var/log/disk-cleanup-watch.log 40 service: disk-cleanup-watch.service 41 behavior: 42 - monitors_root_partition_usage 43 - triggers_cargo_clean_on_all_repos_when_threshold_exceeded 44 - cleans_cargo_registry_cache 45 - logs_all_actions_with_timestamps 46 repos_cleaned: 47 - alphaos 48 - deltaos 49 - adl 50 - alphavm 51 - adnet 52 - deltavm 53 - ac-dc 54 - acdc-core 55 - wallet-core 56 systemd: 57 type: simple 58 user: devops 59 restart: always 60 restart_sec: 10 61 enabled: true # starts on boot 62 commands: 63 status: sudo systemctl status disk-cleanup-watch 64 restart: sudo systemctl restart disk-cleanup-watch 65 stop: sudo systemctl stop disk-cleanup-watch 66 logs_follow: journalctl -u disk-cleanup-watch -f 67 logs_file: cat /var/log/disk-cleanup-watch.log 68 manual_trigger: | 69 # Force cleanup regardless of threshold 70 for repo in alphaos deltaos adl alphavm adnet deltavm ac-dc; do 71 cd /home/devops/working-repos/$repo && cargo clean 72 done 73 74 # === FORGEJO BACKUP === 75 backup_script: 76 path: /usr/local/bin/backup-forgejo.sh 77 dir: /var/backups/forgejo 78 retention: 14_days 79 tasks: 80 - pg_dump forgejo -> db-$DATE.sql.gz 81 - rsync: /var/lib/forgejo/repositories -> repos-latest/ 82 - tar: app.ini, nginx, systemd -> config-$DATE.tar.gz 83 84 # === MAINTENANCE CHECKLISTS === 85 weekly: 86 - check_runner_disk: df -h /opt/ci 87 - review_sccache_hit_rate: sccache --show-stats 88 - check_forgejo_logs: journalctl -u forgejo --since "1 week ago" | grep -i error 89 - verify_backup_integrity 90 91 monthly: 92 - update_forgejo_binary 93 - update_forgejo_runner_binary 94 - update_rust: rustup update 95 - update_mold_if_available 96 - rotate_secrets_tokens 97 - test_cert_renewal: certbot renew --dry-run 98 99 quarterly: 100 - security_audit: cargo audit + CVE review 101 - backup_restoration_test 102 - performance_baseline_comparison 103 - optimize_ci_pipeline_times 104 - cleanup_old_s3_artifacts 105 106 # === EXPECTED BUILD TIMES (32-core) === 107 build_times: 108 cargo_check: cold:45s, warm:15s, incremental:5s 109 cargo_test_all: cold:3min, warm:1min, incremental:30s 110 cargo_build_release_single: cold:2min, warm:45s, incremental:20s 111 full_release_alpha_delta: cold:5min, warm:2min, incremental:1min 112 integration_tests: cold:4min, warm:2min, incremental:1min 113 complete_ci: cold:12min, warm:6min, incremental:4min 114 115 # === RESOURCE TARGETS === 116 targets: 117 cpu_during_build: 80-95% (alert: <50%) 118 memory: 40-70% (alert: >85%) 119 disk_root: <85% (auto_cleanup_at_85%, alert: >90%) 120 disk_io: nvme_saturated_ok (alert: >95% for >5min) 121 sccache_hit_rate: >70% (alert: <50%) 122 job_queue: <3_pending (alert: >8_pending) 123 124 # === TROUBLESHOOTING === 125 troubleshoot: 126 slow_builds: 127 - check_sccache: sccache --show-stats | grep "Cache hits" 128 - verify_mold: readelf -p .comment target/release/binary | grep mold 129 - check_io: iostat -x 1 5 130 - monitor_parallelism: watch -n1 'ps aux | grep rustc | wc -l' 131 132 runner_not_picking_jobs: 133 - check_logs: journalctl -u forgejo-runner -n 50 | grep -i error 134 - verify_network: curl -I https://ci.yourdomain.com/api/v1/version 135 - re-register: stop, rm .runner, register, start 136 137 oom_during_compilation: 138 - reduce_parallelism: CARGO_BUILD_JOBS=16 cargo build --release 139 - enable_swap: fallocate -l 16G /swapfile && mkswap && swapon 140 141 # === COST ESTIMATE (Monthly) === 142 cost: 143 forgejo_droplet: $12 144 runner_droplet: ~$336 145 do_spaces_100gb: ~$5 146 reserved_ip: $4 147 bandwidth_500gb: ~$45 148 total: ~$402/month 149 150 # === PERFORMANCE IMPROVEMENTS === 151 improvements: 152 full_ci_cycle: baseline:25min -> optimized:6min (4x) 153 parallel_jobs: baseline:2 -> optimized:8 (4x) 154 sccache_hit_rate: baseline:40% -> optimized:75% 155 release_build: baseline:15min -> optimized:2min (7x)