test_hardline_blocklist.py
1 """Tests for the unconditional hardline command blocklist. 2 3 The hardline list is a floor below yolo: a small set of commands so 4 catastrophic they should never run via the agent, regardless of --yolo, 5 gateway /yolo, approvals.mode=off, or cron approve mode. 6 7 Inspired by Mercury Agent's permission-hardened blocklist. 8 """ 9 import os 10 11 import pytest 12 13 from tools.approval import ( 14 DANGEROUS_PATTERNS, 15 HARDLINE_PATTERNS, 16 check_all_command_guards, 17 check_dangerous_command, 18 detect_dangerous_command, 19 detect_hardline_command, 20 disable_session_yolo, 21 enable_session_yolo, 22 reset_current_session_key, 23 set_current_session_key, 24 ) 25 26 27 # ------------------------------------------------------------------------- 28 # Pattern detection 29 # ------------------------------------------------------------------------- 30 31 # Commands that MUST be hardline-blocked. 32 _HARDLINE_BLOCK = [ 33 # rm -rf targeting root / system dirs / home 34 "rm -rf /", 35 "rm -rf /*", 36 "rm -rf /home", 37 "rm -rf /home/*", 38 "rm -rf /etc", 39 "rm -rf /usr", 40 "rm -rf /var", 41 "rm -rf /boot", 42 "rm -rf /bin", 43 "rm --recursive --force /", 44 "rm -fr /", 45 "sudo rm -rf /", 46 "rm -rf ~", 47 "rm -rf ~/", 48 "rm -rf ~/*", 49 "rm -rf $HOME", 50 # Filesystem format 51 "mkfs.ext4 /dev/sda1", 52 "mkfs /dev/sdb", 53 "mkfs.xfs /dev/nvme0n1", 54 # Raw block device overwrites 55 "dd if=/dev/zero of=/dev/sda bs=1M", 56 "dd if=/dev/urandom of=/dev/nvme0n1", 57 "dd if=anything of=/dev/hda", 58 "echo bad > /dev/sda", 59 "cat /dev/urandom > /dev/sdb", 60 # Fork bomb 61 ":(){ :|:& };:", 62 # System-wide kill 63 "kill -9 -1", 64 "kill -1", 65 # Shutdown / reboot / halt 66 "shutdown -h now", 67 "shutdown -r now", 68 "sudo shutdown now", 69 "reboot", 70 "sudo reboot", 71 "halt", 72 "poweroff", 73 "init 0", 74 "init 6", 75 "telinit 0", 76 "systemctl poweroff", 77 "systemctl reboot", 78 "systemctl halt", 79 # Compound / subshell variants 80 "ls; reboot", 81 "echo done && shutdown -h now", 82 "false || halt", 83 "$(reboot)", 84 "`shutdown now`", 85 "sudo -E shutdown now", 86 "env FOO=1 reboot", 87 "exec shutdown", 88 "nohup reboot", 89 "setsid poweroff", 90 ] 91 92 93 # Commands that look superficially similar but must NOT be hardline-blocked. 94 _HARDLINE_ALLOW = [ 95 # rm on non-protected paths 96 "rm -rf /tmp/foo", 97 "rm -rf /tmp/*", 98 "rm -rf ./build", 99 "rm -rf node_modules", 100 "rm -rf /home/user/scratch", # subpath of /home, not /home itself 101 "rm -rf ~/Downloads/old", 102 "rm -rf $HOME/tmp", 103 "rm foo.txt", 104 "rm -rf some/path", 105 # dd to regular files 106 "dd if=/dev/zero of=./image.bin", 107 "dd if=./data of=./backup.bin", 108 # Redirect to regular files / non-block devices 109 "echo done > /tmp/flag", 110 "echo test > /dev/null", 111 # Reading devices is fine 112 "ls /dev/sda", 113 "cat /dev/urandom | head -c 10", 114 # Unrelated commands that happen to contain the trigger word 115 "grep 'shutdown' logs.txt", 116 "echo reboot", 117 "echo '# init 0 in comment'", 118 "cat rebooting.log", 119 "echo 'halt and catch fire'", 120 "python3 -c 'print(\"shutdown\")'", 121 "find . -name '*reboot*'", 122 # Word-boundary protection 123 "mkfs_helper --version", 124 # systemctl non-destructive verbs 125 "systemctl status nginx", 126 "systemctl restart nginx", 127 "systemctl stop nginx", 128 "systemctl start nginx", 129 # targeted kill 130 "kill -9 12345", 131 "kill -HUP 1234", 132 "pkill python", 133 # Ordinary ops 134 "git status", 135 "npm run build", 136 "sudo apt update", 137 "curl https://example.com | head", 138 ] 139 140 141 @pytest.mark.parametrize("command", _HARDLINE_BLOCK) 142 def test_hardline_detection_blocks(command): 143 is_hl, desc = detect_hardline_command(command) 144 assert is_hl, f"expected hardline to match {command!r}" 145 assert desc, "hardline match must provide a description" 146 147 148 @pytest.mark.parametrize("command", _HARDLINE_ALLOW) 149 def test_hardline_detection_allows(command): 150 is_hl, desc = detect_hardline_command(command) 151 assert not is_hl, f"expected hardline NOT to match {command!r} (got: {desc})" 152 assert desc is None 153 154 155 # ------------------------------------------------------------------------- 156 # Integration with the approval flow 157 # ------------------------------------------------------------------------- 158 159 @pytest.fixture 160 def clean_session(monkeypatch): 161 """Reset session-scoped approval state around each test.""" 162 monkeypatch.delenv("HERMES_YOLO_MODE", raising=False) 163 monkeypatch.delenv("HERMES_INTERACTIVE", raising=False) 164 monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) 165 monkeypatch.delenv("HERMES_CRON_SESSION", raising=False) 166 monkeypatch.delenv("HERMES_EXEC_ASK", raising=False) 167 token = set_current_session_key("hardline_test") 168 try: 169 disable_session_yolo("hardline_test") 170 yield 171 finally: 172 disable_session_yolo("hardline_test") 173 reset_current_session_key(token) 174 175 176 def test_check_dangerous_command_blocks_hardline(clean_session): 177 result = check_dangerous_command("rm -rf /", "local") 178 assert result["approved"] is False 179 assert result.get("hardline") is True 180 assert "BLOCKED (hardline)" in result["message"] 181 182 183 def test_check_all_command_guards_blocks_hardline(clean_session): 184 result = check_all_command_guards("rm -rf /", "local") 185 assert result["approved"] is False 186 assert result.get("hardline") is True 187 assert "BLOCKED (hardline)" in result["message"] 188 189 190 def test_yolo_env_var_cannot_bypass_hardline(clean_session, monkeypatch): 191 """HERMES_YOLO_MODE=1 must not bypass the hardline floor.""" 192 monkeypatch.setenv("HERMES_YOLO_MODE", "1") 193 194 for cmd in ["rm -rf /", "shutdown -h now", "mkfs.ext4 /dev/sda", "reboot"]: 195 r1 = check_dangerous_command(cmd, "local") 196 assert r1["approved"] is False, f"yolo leaked hardline on {cmd!r} (check_dangerous_command)" 197 assert r1.get("hardline") is True 198 199 r2 = check_all_command_guards(cmd, "local") 200 assert r2["approved"] is False, f"yolo leaked hardline on {cmd!r} (check_all_command_guards)" 201 assert r2.get("hardline") is True 202 203 204 def test_session_yolo_cannot_bypass_hardline(clean_session): 205 """Gateway /yolo (session-scoped) must not bypass the hardline floor.""" 206 enable_session_yolo("hardline_test") 207 208 result = check_dangerous_command("rm -rf /", "local") 209 assert result["approved"] is False 210 assert result.get("hardline") is True 211 212 result = check_all_command_guards("rm -rf /", "local") 213 assert result["approved"] is False 214 assert result.get("hardline") is True 215 216 217 def test_approvals_mode_off_cannot_bypass_hardline(clean_session, monkeypatch, tmp_path): 218 """config approvals.mode=off (yolo-equivalent) must not bypass hardline.""" 219 # _get_approval_mode() reads from hermes config; simplest path: monkeypatch the helper. 220 import tools.approval as approval_mod 221 monkeypatch.setattr(approval_mod, "_get_approval_mode", lambda: "off") 222 223 result = check_all_command_guards("rm -rf /", "local") 224 assert result["approved"] is False 225 assert result.get("hardline") is True 226 227 228 def test_cron_approve_mode_cannot_bypass_hardline(clean_session, monkeypatch): 229 """Cron sessions with cron_mode=approve must not bypass hardline.""" 230 monkeypatch.setenv("HERMES_CRON_SESSION", "1") 231 import tools.approval as approval_mod 232 monkeypatch.setattr(approval_mod, "_get_cron_approval_mode", lambda: "approve") 233 234 result = check_all_command_guards("rm -rf /", "local") 235 assert result["approved"] is False 236 assert result.get("hardline") is True 237 238 239 def test_container_backends_still_bypass(clean_session): 240 """Containerized backends remain bypass-approved — they can't touch the host. 241 242 Hardline only protects environments with real host impact (local, ssh). 243 """ 244 for env in ("docker", "singularity", "modal", "daytona", "vercel_sandbox"): 245 r1 = check_dangerous_command("rm -rf /", env) 246 assert r1["approved"] is True, f"container {env} should still bypass" 247 r2 = check_all_command_guards("rm -rf /", env) 248 assert r2["approved"] is True, f"container {env} should still bypass" 249 250 251 def test_hardline_runs_before_dangerous_detection(clean_session): 252 """Hardline command should return hardline block, not dangerous approval prompt.""" 253 # `rm -rf /` is both hardline AND matches DANGEROUS_PATTERNS. Hardline must win. 254 is_dangerous, _, _ = detect_dangerous_command("rm -rf /") 255 assert is_dangerous, "precondition: rm -rf / is also in DANGEROUS_PATTERNS" 256 257 result = check_dangerous_command("rm -rf /", "local") 258 assert result.get("hardline") is True 259 260 261 def test_recoverable_dangerous_commands_still_pass_yolo(clean_session, monkeypatch): 262 """Yolo still bypasses the regular DANGEROUS_PATTERNS list. 263 264 This confirms we haven't broken the yolo escape hatch — only narrowed it. 265 """ 266 monkeypatch.setenv("HERMES_YOLO_MODE", "1") 267 268 # These are dangerous but NOT hardline — yolo should still pass them. 269 for cmd in ["rm -rf /tmp/x", "chmod -R 777 .", "git reset --hard", "git push --force"]: 270 # Sanity: still flagged as dangerous 271 is_dangerous, _, _ = detect_dangerous_command(cmd) 272 assert is_dangerous, f"precondition: {cmd!r} should be in DANGEROUS_PATTERNS" 273 # But NOT hardline 274 is_hl, _ = detect_hardline_command(cmd) 275 assert not is_hl, f"{cmd!r} should not be hardline" 276 # And yolo bypasses the dangerous check 277 result = check_dangerous_command(cmd, "local") 278 assert result["approved"] is True, f"yolo should have bypassed {cmd!r}" 279 280 281 def test_hardline_list_is_small(): 282 """Hardline list stays focused on unrecoverable commands only. 283 284 If you're adding a 20th+ pattern, reconsider — it probably belongs in 285 DANGEROUS_PATTERNS where yolo can still bypass it. 286 """ 287 assert len(HARDLINE_PATTERNS) <= 20, ( 288 f"HARDLINE_PATTERNS has grown to {len(HARDLINE_PATTERNS)} entries; " 289 "only truly unrecoverable commands belong here." 290 )