/ infra / machine / ci-runner.cspec
ci-runner.cspec
  1  # CI High-Performance Runner Setup
  2  # phase: 3
  3  # human_doc: devops/human/ALPHA-DELTA_CI_Setup_Guide_v2.md
  4  # updated: 2026-01-06
  5  # status: OPERATIONAL
  6  
  7  # === DEPLOYED CONFIG ===
  8  hostname: ci.ac-dc.network
  9  runner_name: ci-runner
 10  runner_version: v12.4.0
 11  
 12  # === SYSTEM PACKAGES ===
 13  packages:
 14    - git, curl, wget, htop, iotop
 15    - build-essential, pkg-config, libssl-dev
 16    - clang, llvm, lld
 17    - docker.io (v29.1.3)
 18    - jq, gnupg, unzip
 19  
 20  # === KERNEL TUNING (APPLIED) ===
 21  sysctl:
 22    file: /etc/sysctl.d/99-ci-performance.conf
 23    values:
 24      vm.swappiness: 10
 25      fs.inotify.max_user_watches: 524288
 26      fs.inotify.max_user_instances: 512
 27      net.core.somaxconn: 65535
 28      net.ipv4.tcp_max_syn_backlog: 65535
 29      fs.file-max: 2097152
 30  
 31  limits:
 32    file: /etc/security/limits.d/ci-limits.conf
 33    nofile_soft: 1048576
 34    nofile_hard: 1048576
 35    nproc: unlimited
 36  
 37  # === STORAGE LAYOUT ===
 38  storage:
 39    volume: /dev/sda (500GB_DO_Block_Storage)
 40    mount: /opt/ci
 41    fs: ext4
 42    options: defaults,noatime
 43    dirs:
 44      - /opt/ci/workspaces (build_dirs)
 45      - /opt/ci/cache (runner_cache)
 46      - /opt/ci/sccache (30GB_compilation_cache)
 47      - /opt/ci/artifacts (build_outputs)
 48      - /opt/ci/ramdisk (16GB_tmpfs)
 49  
 50  tmpfs:
 51    path: /opt/ci/ramdisk
 52    size: 16G
 53    options: defaults,mode=1777
 54  
 55  # === RUST TOOLCHAIN (INSTALLED) ===
 56  # Note: Two installations exist. Systemd service uses home dir installation.
 57  rust:
 58    # Used by systemd service (forgejo-runner)
 59    service_rustup_home: /home/devops/.rustup
 60    service_cargo_home: /home/devops/.cargo
 61    # Used by interactive shells
 62    shell_rustup_home: /opt/rust/rustup
 63    shell_cargo_home: /opt/rust/cargo
 64    shell_profile: /etc/profile.d/rust.sh
 65    installed:
 66      stable: 1.92.0 (ded5c06cf 2025-12-08)
 67      nightly: 1.94.0-nightly (e29fcf45e 2026-01-04)
 68    default: stable
 69  
 70  # === BUILD TOOLS (INSTALLED) ===
 71  tools:
 72    mold:
 73      version: 2.35.1
 74      path: /usr/local/bin/mold
 75      purpose: 10-20x_faster_linking
 76  
 77    sccache:
 78      version: 0.8.2
 79      path: /usr/local/bin/sccache
 80      cache_dir: /opt/ci/sccache
 81      cache_size: 30GB
 82  
 83  # === CARGO CONFIG ===
 84  cargo_config:
 85    # Shell config (interactive use)
 86    shell_file: /opt/rust/cargo/config.toml
 87    content: |
 88      [build]
 89      jobs = 28
 90      rustflags = ["-C", "link-arg=-fuse-ld=mold", "-C", "target-cpu=native"]
 91  
 92      [target.x86_64-unknown-linux-gnu]
 93      linker = "clang"
 94      rustflags = ["-C", "link-arg=-fuse-ld=mold", "-C", "target-cpu=native"]
 95  
 96      [registries.crates-io]
 97      protocol = "sparse"
 98  
 99      [net]
100      git-fetch-with-cli = true
101  
102  # === ENVIRONMENT ===
103  # Shell environment (interactive)
104  shell_environment:
105    file: /etc/profile.d/rust.sh
106    vars:
107      RUSTUP_HOME: /opt/rust/rustup
108      CARGO_HOME: /opt/rust/cargo
109      PATH: $CARGO_HOME/bin:$PATH
110      SCCACHE_DIR: /opt/ci/sccache
111      SCCACHE_CACHE_SIZE: 30G
112      RUSTC_WRAPPER: sccache
113  
114  # Systemd service environment (runner)
115  service_environment:
116    vars:
117      HOME: /home/devops
118      PATH: /home/devops/.cargo/bin:/usr/local/bin:/usr/bin:/bin
119      RUSTC_WRAPPER: sccache
120      SCCACHE_DIR: /opt/ci/sccache
121      CARGO_INCREMENTAL: 0
122  
123  # === FORGEJO-RUNNER (6 PARALLEL INSTANCES) ===
124  runner:
125    path: /usr/local/bin/forgejo-runner
126    version: v12.4.0
127    instances: 6
128    config_dir: /var/lib/forgejo-runner
129  
130    # Each instance has own config in /var/lib/forgejo-runner/runner-{N}/
131    instance_configs:
132      - /var/lib/forgejo-runner/runner-1/config.yaml
133      - /var/lib/forgejo-runner/runner-2/config.yaml
134      - /var/lib/forgejo-runner/runner-3/config.yaml
135      - /var/lib/forgejo-runner/runner-4/config.yaml
136      - /var/lib/forgejo-runner/runner-5/config.yaml
137      - /var/lib/forgejo-runner/runner-6/config.yaml
138  
139    settings:
140      capacity: 1 (per_instance, 6_total)
141      timeout: 3h
142      fetch_interval: 2s
143  
144    labels:
145      - native:host
146      - rust-native:host
147      - linux-x64:host
148  
149    cache:
150      enabled: true
151      dir: /opt/ci/cache
152  
153    host:
154      workdir_parent: /opt/ci/workspaces
155  
156  # === SYSTEMD SERVICES (6 INSTANCES) ===
157  systemd:
158    services:
159      - forgejo-runner-1.service
160      - forgejo-runner-2.service
161      - forgejo-runner-3.service
162      - forgejo-runner-4.service
163      - forgejo-runner-5.service
164      - forgejo-runner-6.service
165    exec_start_template: /usr/local/bin/forgejo-runner daemon --config /var/lib/forgejo-runner/runner-{N}/config.yaml
166    status: active (running)
167  
168    # Health check note: Check individual services, not forgejo-runner.service
169    check_command: systemctl list-units forgejo-runner-*.service --state=active
170  
171  # === VERIFICATION COMMANDS ===
172  verify:
173    rust: source /etc/profile.d/rust.sh && rustc --version
174    mold: mold --version
175    sccache: sccache --show-stats
176    runner: systemctl status forgejo-runner
177    capacity: grep capacity /var/lib/forgejo-runner/config.yaml
178    kernel: sysctl vm.swappiness fs.inotify.max_user_watches
179    storage: df -h /opt/ci /opt/ci/ramdisk
180  
181  # === HEALTH CHECK (Internal) ===
182  health_script: /usr/local/bin/runner-health.sh
183  
184  # === HEALTH API (Public) ===
185  # Public JSON endpoint for remote monitoring
186  health_api:
187    status: PENDING_DEPLOY
188    endpoint: https://ci.ac-dc.network/health/status
189    format: JSON
190    update_interval: 60s (cron)
191  
192    components:
193      script: /usr/local/bin/runner-health-json.sh
194      output: /var/www/health/status.json
195      cron: /etc/cron.d/runner-health
196      caddy: /etc/caddy/Caddyfile (ci.ac-dc.network section)
197  
198    source_files:
199      script: scripts/runner-health-json.sh
200      cron: scripts/deploy/runner-health.cron
201      caddy: scripts/deploy/caddy-health.snippet
202  
203    response_fields:
204      - timestamp (ISO8601)
205      - runner.status (active|inactive|unknown)
206      - runner.uptime (Xd Xh Xm)
207      - runner.jobs_last_hour (int)
208      - system.disk_free_gb (int)
209      - system.disk_used_pct (int)
210      - system.load_avg (string)
211      - system.memory_free_gb (int)
212      - connectivity.forgejo_reachable (bool)
213      - sccache.hits (int)
214      - sccache.misses (int)
215  
216    deploy_commands: |
217      # 1. Copy script
218      scp -P 2584 scripts/runner-health-json.sh devops@ci.ac-dc.network:/tmp/
219      ssh -p 2584 devops@ci.ac-dc.network "sudo mv /tmp/runner-health-json.sh /usr/local/bin/ && sudo chmod +x /usr/local/bin/runner-health-json.sh"
220  
221      # 2. Create output directory
222      ssh -p 2584 devops@ci.ac-dc.network "sudo mkdir -p /var/www/health && sudo chown root:root /var/www/health"
223  
224      # 3. Install cron
225      scp -P 2584 scripts/deploy/runner-health.cron devops@ci.ac-dc.network:/tmp/
226      ssh -p 2584 devops@ci.ac-dc.network "sudo mv /tmp/runner-health.cron /etc/cron.d/runner-health && sudo chmod 644 /etc/cron.d/runner-health"
227  
228      # 4. Update Caddy config (merge with existing)
229      # Manual: Add health endpoint to /etc/caddy/Caddyfile
230      ssh -p 2584 devops@ci.ac-dc.network "sudo systemctl reload caddy"
231  
232      # 5. Test
233      curl https://ci.ac-dc.network/health/status