/ tests / tools / test_hardline_blocklist.py
test_hardline_blocklist.py
  1  """Tests for the unconditional hardline command blocklist.
  2  
  3  The hardline list is a floor below yolo: a small set of commands so
  4  catastrophic they should never run via the agent, regardless of --yolo,
  5  gateway /yolo, approvals.mode=off, or cron approve mode.
  6  
  7  Inspired by Mercury Agent's permission-hardened blocklist.
  8  """
  9  import os
 10  
 11  import pytest
 12  
 13  from tools.approval import (
 14      DANGEROUS_PATTERNS,
 15      HARDLINE_PATTERNS,
 16      check_all_command_guards,
 17      check_dangerous_command,
 18      detect_dangerous_command,
 19      detect_hardline_command,
 20      disable_session_yolo,
 21      enable_session_yolo,
 22      reset_current_session_key,
 23      set_current_session_key,
 24  )
 25  
 26  
 27  # -------------------------------------------------------------------------
 28  # Pattern detection
 29  # -------------------------------------------------------------------------
 30  
 31  # Commands that MUST be hardline-blocked.
 32  _HARDLINE_BLOCK = [
 33      # rm -rf targeting root / system dirs / home
 34      "rm -rf /",
 35      "rm -rf /*",
 36      "rm -rf /home",
 37      "rm -rf /home/*",
 38      "rm -rf /etc",
 39      "rm -rf /usr",
 40      "rm -rf /var",
 41      "rm -rf /boot",
 42      "rm -rf /bin",
 43      "rm --recursive --force /",
 44      "rm -fr /",
 45      "sudo rm -rf /",
 46      "rm -rf ~",
 47      "rm -rf ~/",
 48      "rm -rf ~/*",
 49      "rm -rf $HOME",
 50      # Filesystem format
 51      "mkfs.ext4 /dev/sda1",
 52      "mkfs /dev/sdb",
 53      "mkfs.xfs /dev/nvme0n1",
 54      # Raw block device overwrites
 55      "dd if=/dev/zero of=/dev/sda bs=1M",
 56      "dd if=/dev/urandom of=/dev/nvme0n1",
 57      "dd if=anything of=/dev/hda",
 58      "echo bad > /dev/sda",
 59      "cat /dev/urandom > /dev/sdb",
 60      # Fork bomb
 61      ":(){ :|:& };:",
 62      # System-wide kill
 63      "kill -9 -1",
 64      "kill -1",
 65      # Shutdown / reboot / halt
 66      "shutdown -h now",
 67      "shutdown -r now",
 68      "sudo shutdown now",
 69      "reboot",
 70      "sudo reboot",
 71      "halt",
 72      "poweroff",
 73      "init 0",
 74      "init 6",
 75      "telinit 0",
 76      "systemctl poweroff",
 77      "systemctl reboot",
 78      "systemctl halt",
 79      # Compound / subshell variants
 80      "ls; reboot",
 81      "echo done && shutdown -h now",
 82      "false || halt",
 83      "$(reboot)",
 84      "`shutdown now`",
 85      "sudo -E shutdown now",
 86      "env FOO=1 reboot",
 87      "exec shutdown",
 88      "nohup reboot",
 89      "setsid poweroff",
 90  ]
 91  
 92  
 93  # Commands that look superficially similar but must NOT be hardline-blocked.
 94  _HARDLINE_ALLOW = [
 95      # rm on non-protected paths
 96      "rm -rf /tmp/foo",
 97      "rm -rf /tmp/*",
 98      "rm -rf ./build",
 99      "rm -rf node_modules",
100      "rm -rf /home/user/scratch",  # subpath of /home, not /home itself
101      "rm -rf ~/Downloads/old",
102      "rm -rf $HOME/tmp",
103      "rm foo.txt",
104      "rm -rf some/path",
105      # dd to regular files
106      "dd if=/dev/zero of=./image.bin",
107      "dd if=./data of=./backup.bin",
108      # Redirect to regular files / non-block devices
109      "echo done > /tmp/flag",
110      "echo test > /dev/null",
111      # Reading devices is fine
112      "ls /dev/sda",
113      "cat /dev/urandom | head -c 10",
114      # Unrelated commands that happen to contain the trigger word
115      "grep 'shutdown' logs.txt",
116      "echo reboot",
117      "echo '# init 0 in comment'",
118      "cat rebooting.log",
119      "echo 'halt and catch fire'",
120      "python3 -c 'print(\"shutdown\")'",
121      "find . -name '*reboot*'",
122      # Word-boundary protection
123      "mkfs_helper --version",
124      # systemctl non-destructive verbs
125      "systemctl status nginx",
126      "systemctl restart nginx",
127      "systemctl stop nginx",
128      "systemctl start nginx",
129      # targeted kill
130      "kill -9 12345",
131      "kill -HUP 1234",
132      "pkill python",
133      # Ordinary ops
134      "git status",
135      "npm run build",
136      "sudo apt update",
137      "curl https://example.com | head",
138  ]
139  
140  
141  @pytest.mark.parametrize("command", _HARDLINE_BLOCK)
142  def test_hardline_detection_blocks(command):
143      is_hl, desc = detect_hardline_command(command)
144      assert is_hl, f"expected hardline to match {command!r}"
145      assert desc, "hardline match must provide a description"
146  
147  
148  @pytest.mark.parametrize("command", _HARDLINE_ALLOW)
149  def test_hardline_detection_allows(command):
150      is_hl, desc = detect_hardline_command(command)
151      assert not is_hl, f"expected hardline NOT to match {command!r} (got: {desc})"
152      assert desc is None
153  
154  
155  # -------------------------------------------------------------------------
156  # Integration with the approval flow
157  # -------------------------------------------------------------------------
158  
159  @pytest.fixture
160  def clean_session(monkeypatch):
161      """Reset session-scoped approval state around each test."""
162      monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
163      monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
164      monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
165      monkeypatch.delenv("HERMES_CRON_SESSION", raising=False)
166      monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
167      token = set_current_session_key("hardline_test")
168      try:
169          disable_session_yolo("hardline_test")
170          yield
171      finally:
172          disable_session_yolo("hardline_test")
173          reset_current_session_key(token)
174  
175  
176  def test_check_dangerous_command_blocks_hardline(clean_session):
177      result = check_dangerous_command("rm -rf /", "local")
178      assert result["approved"] is False
179      assert result.get("hardline") is True
180      assert "BLOCKED (hardline)" in result["message"]
181  
182  
183  def test_check_all_command_guards_blocks_hardline(clean_session):
184      result = check_all_command_guards("rm -rf /", "local")
185      assert result["approved"] is False
186      assert result.get("hardline") is True
187      assert "BLOCKED (hardline)" in result["message"]
188  
189  
190  def test_yolo_env_var_cannot_bypass_hardline(clean_session, monkeypatch):
191      """HERMES_YOLO_MODE=1 must not bypass the hardline floor."""
192      monkeypatch.setenv("HERMES_YOLO_MODE", "1")
193  
194      for cmd in ["rm -rf /", "shutdown -h now", "mkfs.ext4 /dev/sda", "reboot"]:
195          r1 = check_dangerous_command(cmd, "local")
196          assert r1["approved"] is False, f"yolo leaked hardline on {cmd!r} (check_dangerous_command)"
197          assert r1.get("hardline") is True
198  
199          r2 = check_all_command_guards(cmd, "local")
200          assert r2["approved"] is False, f"yolo leaked hardline on {cmd!r} (check_all_command_guards)"
201          assert r2.get("hardline") is True
202  
203  
204  def test_session_yolo_cannot_bypass_hardline(clean_session):
205      """Gateway /yolo (session-scoped) must not bypass the hardline floor."""
206      enable_session_yolo("hardline_test")
207  
208      result = check_dangerous_command("rm -rf /", "local")
209      assert result["approved"] is False
210      assert result.get("hardline") is True
211  
212      result = check_all_command_guards("rm -rf /", "local")
213      assert result["approved"] is False
214      assert result.get("hardline") is True
215  
216  
217  def test_approvals_mode_off_cannot_bypass_hardline(clean_session, monkeypatch, tmp_path):
218      """config approvals.mode=off (yolo-equivalent) must not bypass hardline."""
219      # _get_approval_mode() reads from hermes config; simplest path: monkeypatch the helper.
220      import tools.approval as approval_mod
221      monkeypatch.setattr(approval_mod, "_get_approval_mode", lambda: "off")
222  
223      result = check_all_command_guards("rm -rf /", "local")
224      assert result["approved"] is False
225      assert result.get("hardline") is True
226  
227  
228  def test_cron_approve_mode_cannot_bypass_hardline(clean_session, monkeypatch):
229      """Cron sessions with cron_mode=approve must not bypass hardline."""
230      monkeypatch.setenv("HERMES_CRON_SESSION", "1")
231      import tools.approval as approval_mod
232      monkeypatch.setattr(approval_mod, "_get_cron_approval_mode", lambda: "approve")
233  
234      result = check_all_command_guards("rm -rf /", "local")
235      assert result["approved"] is False
236      assert result.get("hardline") is True
237  
238  
239  def test_container_backends_still_bypass(clean_session):
240      """Containerized backends remain bypass-approved — they can't touch the host.
241  
242      Hardline only protects environments with real host impact (local, ssh).
243      """
244      for env in ("docker", "singularity", "modal", "daytona", "vercel_sandbox"):
245          r1 = check_dangerous_command("rm -rf /", env)
246          assert r1["approved"] is True, f"container {env} should still bypass"
247          r2 = check_all_command_guards("rm -rf /", env)
248          assert r2["approved"] is True, f"container {env} should still bypass"
249  
250  
251  def test_hardline_runs_before_dangerous_detection(clean_session):
252      """Hardline command should return hardline block, not dangerous approval prompt."""
253      # `rm -rf /` is both hardline AND matches DANGEROUS_PATTERNS. Hardline must win.
254      is_dangerous, _, _ = detect_dangerous_command("rm -rf /")
255      assert is_dangerous, "precondition: rm -rf / is also in DANGEROUS_PATTERNS"
256  
257      result = check_dangerous_command("rm -rf /", "local")
258      assert result.get("hardline") is True
259  
260  
261  def test_recoverable_dangerous_commands_still_pass_yolo(clean_session, monkeypatch):
262      """Yolo still bypasses the regular DANGEROUS_PATTERNS list.
263  
264      This confirms we haven't broken the yolo escape hatch — only narrowed it.
265      """
266      monkeypatch.setenv("HERMES_YOLO_MODE", "1")
267  
268      # These are dangerous but NOT hardline — yolo should still pass them.
269      for cmd in ["rm -rf /tmp/x", "chmod -R 777 .", "git reset --hard", "git push --force"]:
270          # Sanity: still flagged as dangerous
271          is_dangerous, _, _ = detect_dangerous_command(cmd)
272          assert is_dangerous, f"precondition: {cmd!r} should be in DANGEROUS_PATTERNS"
273          # But NOT hardline
274          is_hl, _ = detect_hardline_command(cmd)
275          assert not is_hl, f"{cmd!r} should not be hardline"
276          # And yolo bypasses the dangerous check
277          result = check_dangerous_command(cmd, "local")
278          assert result["approved"] is True, f"yolo should have bypassed {cmd!r}"
279  
280  
281  def test_hardline_list_is_small():
282      """Hardline list stays focused on unrecoverable commands only.
283  
284      If you're adding a 20th+ pattern, reconsider — it probably belongs in
285      DANGEROUS_PATTERNS where yolo can still bypass it.
286      """
287      assert len(HARDLINE_PATTERNS) <= 20, (
288          f"HARDLINE_PATTERNS has grown to {len(HARDLINE_PATTERNS)} entries; "
289          "only truly unrecoverable commands belong here."
290      )