__init__.py
1 """disk-cleanup plugin — auto-cleanup of ephemeral Hermes session files. 2 3 Wires three behaviours: 4 5 1. ``post_tool_call`` hook — inspects ``write_file`` and ``terminal`` 6 tool results for newly-created paths matching test/temp patterns 7 under ``HERMES_HOME`` and tracks them silently. Zero agent 8 compliance required. 9 10 2. ``on_session_end`` hook — when any test files were auto-tracked 11 during the just-finished turn, runs :func:`disk_cleanup.quick` and 12 logs a single line to ``$HERMES_HOME/disk-cleanup/cleanup.log``. 13 14 3. ``/disk-cleanup`` slash command — manual ``status``, ``dry-run``, 15 ``quick``, ``deep``, ``track``, ``forget``. 16 17 Replaces PR #12212's skill-plus-script design: the agent no longer 18 needs to remember to run commands. 19 """ 20 21 from __future__ import annotations 22 23 import logging 24 import re 25 import shlex 26 import threading 27 from pathlib import Path 28 from typing import Any, Dict, Optional, Set 29 30 from . import disk_cleanup as dg 31 32 logger = logging.getLogger(__name__) 33 34 35 # Per-task set of "test files newly tracked this turn". Keyed by task_id 36 # (or session_id as fallback) so on_session_end can decide whether to run 37 # cleanup. Guarded by a lock — post_tool_call can fire concurrently on 38 # parallel tool calls. 39 _recent_test_tracks: Dict[str, Set[str]] = {} 40 _lock = threading.Lock() 41 42 43 # Tool-call result shapes we can parse 44 _WRITE_FILE_PATH_KEY = "path" 45 _TERMINAL_PATH_REGEX = re.compile(r"(?:^|\s)(/[^\s'\"`]+|\~/[^\s'\"`]+)") 46 47 48 # --------------------------------------------------------------------------- 49 # Helpers 50 # --------------------------------------------------------------------------- 51 52 def _tracker_key(task_id: str, session_id: str) -> str: 53 return task_id or session_id or "default" 54 55 56 def _record_track(task_id: str, session_id: str, path: Path, category: str) -> None: 57 """Record that we tracked *path* as *category* during this turn.""" 58 if category != "test": 59 return 60 key = _tracker_key(task_id, session_id) 61 with _lock: 62 _recent_test_tracks.setdefault(key, set()).add(str(path)) 63 64 65 def _drain(task_id: str, session_id: str) -> Set[str]: 66 """Pop the set of test paths tracked during this turn.""" 67 key = _tracker_key(task_id, session_id) 68 with _lock: 69 return _recent_test_tracks.pop(key, set()) 70 71 72 def _attempt_track(path_str: str, task_id: str, session_id: str) -> None: 73 """Best-effort auto-track. Never raises.""" 74 try: 75 p = Path(path_str).expanduser() 76 except Exception: 77 return 78 if not p.exists(): 79 return 80 category = dg.guess_category(p) 81 if category is None: 82 return 83 newly = dg.track(str(p), category, silent=True) 84 if newly: 85 _record_track(task_id, session_id, p, category) 86 87 88 def _extract_paths_from_write_file(args: Dict[str, Any]) -> Set[str]: 89 path = args.get(_WRITE_FILE_PATH_KEY) 90 return {path} if isinstance(path, str) and path else set() 91 92 93 def _extract_paths_from_patch(args: Dict[str, Any]) -> Set[str]: 94 # The patch tool creates new files via the `mode="patch"` path too, but 95 # most of its use is editing existing files — we only care about new 96 # ephemeral creations, so treat patch conservatively and only pick up 97 # the single-file `path` arg. Track-then-cleanup is idempotent, so 98 # re-tracking an already-tracked file is a no-op (dedup in track()). 99 path = args.get("path") 100 return {path} if isinstance(path, str) and path else set() 101 102 103 def _extract_paths_from_terminal(args: Dict[str, Any], result: str) -> Set[str]: 104 """Best-effort: pull candidate filesystem paths from a terminal command 105 and its output, then let ``guess_category`` / ``is_safe_path`` filter. 106 """ 107 paths: Set[str] = set() 108 cmd = args.get("command") or "" 109 if isinstance(cmd, str) and cmd: 110 # Tokenise the command — catches `touch /tmp/hermes-x/test_foo.py` 111 try: 112 for tok in shlex.split(cmd, posix=True): 113 if tok.startswith(("/", "~")): 114 paths.add(tok) 115 except ValueError: 116 pass 117 # Only scan the result text if it's a reasonable size (avoid 50KB dumps). 118 if isinstance(result, str) and len(result) < 4096: 119 for match in _TERMINAL_PATH_REGEX.findall(result): 120 paths.add(match) 121 return paths 122 123 124 # --------------------------------------------------------------------------- 125 # Hooks 126 # --------------------------------------------------------------------------- 127 128 def _on_post_tool_call( 129 tool_name: str = "", 130 args: Optional[Dict[str, Any]] = None, 131 result: Any = None, 132 task_id: str = "", 133 session_id: str = "", 134 tool_call_id: str = "", 135 **_: Any, 136 ) -> None: 137 """Auto-track ephemeral files created by recent tool calls.""" 138 if not isinstance(args, dict): 139 return 140 141 candidates: Set[str] = set() 142 if tool_name == "write_file": 143 candidates = _extract_paths_from_write_file(args) 144 elif tool_name == "patch": 145 candidates = _extract_paths_from_patch(args) 146 elif tool_name == "terminal": 147 candidates = _extract_paths_from_terminal(args, result if isinstance(result, str) else "") 148 else: 149 return 150 151 for path_str in candidates: 152 _attempt_track(path_str, task_id, session_id) 153 154 155 def _on_session_end( 156 session_id: str = "", 157 completed: bool = True, 158 interrupted: bool = False, 159 **_: Any, 160 ) -> None: 161 """Run quick cleanup if any test files were tracked during this turn.""" 162 # Drain both task-level and session-level buckets. In practice only one 163 # is populated per turn; the other is empty. 164 drained_session = _drain("", session_id) 165 # Also drain any task-scoped buckets that happen to exist. This is a 166 # cheap sweep: if an agent spawned subagents (each with their own 167 # task_id) they'll have recorded into separate buckets; we want to 168 # cleanup them all at session end. 169 with _lock: 170 task_buckets = list(_recent_test_tracks.keys()) 171 for key in task_buckets: 172 if key and key != session_id: 173 _recent_test_tracks.pop(key, None) 174 175 if not drained_session and not task_buckets: 176 return 177 178 try: 179 summary = dg.quick() 180 except Exception as exc: 181 logger.debug("disk-cleanup quick cleanup failed: %s", exc) 182 return 183 184 if summary["deleted"] or summary["empty_dirs"]: 185 dg._log( 186 f"AUTO_QUICK (session_end): deleted={summary['deleted']} " 187 f"dirs={summary['empty_dirs']} freed={dg.fmt_size(summary['freed'])}" 188 ) 189 190 191 # --------------------------------------------------------------------------- 192 # Slash command 193 # --------------------------------------------------------------------------- 194 195 _HELP_TEXT = """\ 196 /disk-cleanup — ephemeral-file cleanup 197 198 Subcommands: 199 status Per-category breakdown + top-10 largest 200 dry-run Preview what quick/deep would delete 201 quick Run safe cleanup now (no prompts) 202 deep Run quick, then list items that need prompts 203 track <path> <category> Manually add a path to tracking 204 forget <path> Stop tracking a path (does not delete) 205 206 Categories: temp | test | research | download | chrome-profile | cron-output | other 207 208 All operations are scoped to HERMES_HOME and /tmp/hermes-*. 209 Test files are auto-tracked on write_file / terminal and auto-cleaned at session end. 210 """ 211 212 213 def _fmt_summary(summary: Dict[str, Any]) -> str: 214 base = ( 215 f"[disk-cleanup] Cleaned {summary['deleted']} files + " 216 f"{summary['empty_dirs']} empty dirs, freed {dg.fmt_size(summary['freed'])}." 217 ) 218 if summary.get("errors"): 219 base += f"\n {len(summary['errors'])} error(s); see cleanup.log." 220 return base 221 222 223 def _handle_slash(raw_args: str) -> Optional[str]: 224 argv = raw_args.strip().split() 225 if not argv or argv[0] in ("help", "-h", "--help"): 226 return _HELP_TEXT 227 228 sub = argv[0] 229 230 if sub == "status": 231 return dg.format_status(dg.status()) 232 233 if sub == "dry-run": 234 auto, prompt = dg.dry_run() 235 auto_size = sum(i["size"] for i in auto) 236 prompt_size = sum(i["size"] for i in prompt) 237 lines = [ 238 "Dry-run preview (nothing deleted):", 239 f" Auto-delete : {len(auto)} files ({dg.fmt_size(auto_size)})", 240 ] 241 for item in auto: 242 lines.append(f" [{item['category']}] {item['path']}") 243 lines.append( 244 f" Needs prompt: {len(prompt)} files ({dg.fmt_size(prompt_size)})" 245 ) 246 for item in prompt: 247 lines.append(f" [{item['category']}] {item['path']}") 248 lines.append( 249 f"\n Total potential: {dg.fmt_size(auto_size + prompt_size)}" 250 ) 251 return "\n".join(lines) 252 253 if sub == "quick": 254 return _fmt_summary(dg.quick()) 255 256 if sub == "deep": 257 # In-session deep can't prompt the user interactively — show what 258 # quick cleaned plus the items that WOULD need confirmation. 259 quick_summary = dg.quick() 260 _auto, prompt_items = dg.dry_run() 261 lines = [_fmt_summary(quick_summary)] 262 if prompt_items: 263 size = sum(i["size"] for i in prompt_items) 264 lines.append( 265 f"\n{len(prompt_items)} item(s) need confirmation " 266 f"({dg.fmt_size(size)}):" 267 ) 268 for item in prompt_items: 269 lines.append(f" [{item['category']}] {item['path']}") 270 lines.append( 271 "\nRun `/disk-cleanup forget <path>` to skip, or delete " 272 "manually via terminal." 273 ) 274 return "\n".join(lines) 275 276 if sub == "track": 277 if len(argv) < 3: 278 return "Usage: /disk-cleanup track <path> <category>" 279 path_arg = argv[1] 280 category = argv[2] 281 if category not in dg.ALLOWED_CATEGORIES: 282 return ( 283 f"Unknown category '{category}'. " 284 f"Allowed: {sorted(dg.ALLOWED_CATEGORIES)}" 285 ) 286 if dg.track(path_arg, category, silent=True): 287 return f"Tracked {path_arg} as '{category}'." 288 return ( 289 f"Not tracked (already present, missing, or outside HERMES_HOME): " 290 f"{path_arg}" 291 ) 292 293 if sub == "forget": 294 if len(argv) < 2: 295 return "Usage: /disk-cleanup forget <path>" 296 n = dg.forget(argv[1]) 297 return ( 298 f"Removed {n} tracking entr{'y' if n == 1 else 'ies'} for {argv[1]}." 299 if n else f"Not found in tracking: {argv[1]}" 300 ) 301 302 return f"Unknown subcommand: {sub}\n\n{_HELP_TEXT}" 303 304 305 # --------------------------------------------------------------------------- 306 # Plugin registration 307 # --------------------------------------------------------------------------- 308 309 def register(ctx) -> None: 310 ctx.register_hook("post_tool_call", _on_post_tool_call) 311 ctx.register_hook("on_session_end", _on_session_end) 312 ctx.register_command( 313 "disk-cleanup", 314 handler=_handle_slash, 315 description="Track and clean up ephemeral Hermes session files.", 316 )