/ plugins / disk-cleanup / __init__.py
__init__.py
  1  """disk-cleanup plugin — auto-cleanup of ephemeral Hermes session files.
  2  
  3  Wires three behaviours:
  4  
  5  1. ``post_tool_call`` hook — inspects ``write_file`` and ``terminal``
  6     tool results for newly-created paths matching test/temp patterns
  7     under ``HERMES_HOME`` and tracks them silently.  Zero agent
  8     compliance required.
  9  
 10  2. ``on_session_end`` hook — when any test files were auto-tracked
 11     during the just-finished turn, runs :func:`disk_cleanup.quick` and
 12     logs a single line to ``$HERMES_HOME/disk-cleanup/cleanup.log``.
 13  
 14  3. ``/disk-cleanup`` slash command — manual ``status``, ``dry-run``,
 15     ``quick``, ``deep``, ``track``, ``forget``.
 16  
 17  Replaces PR #12212's skill-plus-script design: the agent no longer
 18  needs to remember to run commands.
 19  """
 20  
 21  from __future__ import annotations
 22  
 23  import logging
 24  import re
 25  import shlex
 26  import threading
 27  from pathlib import Path
 28  from typing import Any, Dict, Optional, Set
 29  
 30  from . import disk_cleanup as dg
 31  
 32  logger = logging.getLogger(__name__)
 33  
 34  
 35  # Per-task set of "test files newly tracked this turn".  Keyed by task_id
 36  # (or session_id as fallback) so on_session_end can decide whether to run
 37  # cleanup.  Guarded by a lock — post_tool_call can fire concurrently on
 38  # parallel tool calls.
 39  _recent_test_tracks: Dict[str, Set[str]] = {}
 40  _lock = threading.Lock()
 41  
 42  
 43  # Tool-call result shapes we can parse
 44  _WRITE_FILE_PATH_KEY = "path"
 45  _TERMINAL_PATH_REGEX = re.compile(r"(?:^|\s)(/[^\s'\"`]+|\~/[^\s'\"`]+)")
 46  
 47  
 48  # ---------------------------------------------------------------------------
 49  # Helpers
 50  # ---------------------------------------------------------------------------
 51  
 52  def _tracker_key(task_id: str, session_id: str) -> str:
 53      return task_id or session_id or "default"
 54  
 55  
 56  def _record_track(task_id: str, session_id: str, path: Path, category: str) -> None:
 57      """Record that we tracked *path* as *category* during this turn."""
 58      if category != "test":
 59          return
 60      key = _tracker_key(task_id, session_id)
 61      with _lock:
 62          _recent_test_tracks.setdefault(key, set()).add(str(path))
 63  
 64  
 65  def _drain(task_id: str, session_id: str) -> Set[str]:
 66      """Pop the set of test paths tracked during this turn."""
 67      key = _tracker_key(task_id, session_id)
 68      with _lock:
 69          return _recent_test_tracks.pop(key, set())
 70  
 71  
 72  def _attempt_track(path_str: str, task_id: str, session_id: str) -> None:
 73      """Best-effort auto-track. Never raises."""
 74      try:
 75          p = Path(path_str).expanduser()
 76      except Exception:
 77          return
 78      if not p.exists():
 79          return
 80      category = dg.guess_category(p)
 81      if category is None:
 82          return
 83      newly = dg.track(str(p), category, silent=True)
 84      if newly:
 85          _record_track(task_id, session_id, p, category)
 86  
 87  
 88  def _extract_paths_from_write_file(args: Dict[str, Any]) -> Set[str]:
 89      path = args.get(_WRITE_FILE_PATH_KEY)
 90      return {path} if isinstance(path, str) and path else set()
 91  
 92  
 93  def _extract_paths_from_patch(args: Dict[str, Any]) -> Set[str]:
 94      # The patch tool creates new files via the `mode="patch"` path too, but
 95      # most of its use is editing existing files — we only care about new
 96      # ephemeral creations, so treat patch conservatively and only pick up
 97      # the single-file `path` arg.  Track-then-cleanup is idempotent, so
 98      # re-tracking an already-tracked file is a no-op (dedup in track()).
 99      path = args.get("path")
100      return {path} if isinstance(path, str) and path else set()
101  
102  
103  def _extract_paths_from_terminal(args: Dict[str, Any], result: str) -> Set[str]:
104      """Best-effort: pull candidate filesystem paths from a terminal command
105      and its output, then let ``guess_category`` / ``is_safe_path`` filter.
106      """
107      paths: Set[str] = set()
108      cmd = args.get("command") or ""
109      if isinstance(cmd, str) and cmd:
110          # Tokenise the command — catches `touch /tmp/hermes-x/test_foo.py`
111          try:
112              for tok in shlex.split(cmd, posix=True):
113                  if tok.startswith(("/", "~")):
114                      paths.add(tok)
115          except ValueError:
116              pass
117      # Only scan the result text if it's a reasonable size (avoid 50KB dumps).
118      if isinstance(result, str) and len(result) < 4096:
119          for match in _TERMINAL_PATH_REGEX.findall(result):
120              paths.add(match)
121      return paths
122  
123  
124  # ---------------------------------------------------------------------------
125  # Hooks
126  # ---------------------------------------------------------------------------
127  
128  def _on_post_tool_call(
129      tool_name: str = "",
130      args: Optional[Dict[str, Any]] = None,
131      result: Any = None,
132      task_id: str = "",
133      session_id: str = "",
134      tool_call_id: str = "",
135      **_: Any,
136  ) -> None:
137      """Auto-track ephemeral files created by recent tool calls."""
138      if not isinstance(args, dict):
139          return
140  
141      candidates: Set[str] = set()
142      if tool_name == "write_file":
143          candidates = _extract_paths_from_write_file(args)
144      elif tool_name == "patch":
145          candidates = _extract_paths_from_patch(args)
146      elif tool_name == "terminal":
147          candidates = _extract_paths_from_terminal(args, result if isinstance(result, str) else "")
148      else:
149          return
150  
151      for path_str in candidates:
152          _attempt_track(path_str, task_id, session_id)
153  
154  
155  def _on_session_end(
156      session_id: str = "",
157      completed: bool = True,
158      interrupted: bool = False,
159      **_: Any,
160  ) -> None:
161      """Run quick cleanup if any test files were tracked during this turn."""
162      # Drain both task-level and session-level buckets.  In practice only one
163      # is populated per turn; the other is empty.
164      drained_session = _drain("", session_id)
165      # Also drain any task-scoped buckets that happen to exist.  This is a
166      # cheap sweep: if an agent spawned subagents (each with their own
167      # task_id) they'll have recorded into separate buckets; we want to
168      # cleanup them all at session end.
169      with _lock:
170          task_buckets = list(_recent_test_tracks.keys())
171      for key in task_buckets:
172          if key and key != session_id:
173              _recent_test_tracks.pop(key, None)
174  
175      if not drained_session and not task_buckets:
176          return
177  
178      try:
179          summary = dg.quick()
180      except Exception as exc:
181          logger.debug("disk-cleanup quick cleanup failed: %s", exc)
182          return
183  
184      if summary["deleted"] or summary["empty_dirs"]:
185          dg._log(
186              f"AUTO_QUICK (session_end): deleted={summary['deleted']} "
187              f"dirs={summary['empty_dirs']} freed={dg.fmt_size(summary['freed'])}"
188          )
189  
190  
191  # ---------------------------------------------------------------------------
192  # Slash command
193  # ---------------------------------------------------------------------------
194  
195  _HELP_TEXT = """\
196  /disk-cleanup — ephemeral-file cleanup
197  
198  Subcommands:
199    status                     Per-category breakdown + top-10 largest
200    dry-run                    Preview what quick/deep would delete
201    quick                      Run safe cleanup now (no prompts)
202    deep                       Run quick, then list items that need prompts
203    track <path> <category>    Manually add a path to tracking
204    forget <path>              Stop tracking a path (does not delete)
205  
206  Categories: temp | test | research | download | chrome-profile | cron-output | other
207  
208  All operations are scoped to HERMES_HOME and /tmp/hermes-*.
209  Test files are auto-tracked on write_file / terminal and auto-cleaned at session end.
210  """
211  
212  
213  def _fmt_summary(summary: Dict[str, Any]) -> str:
214      base = (
215          f"[disk-cleanup] Cleaned {summary['deleted']} files + "
216          f"{summary['empty_dirs']} empty dirs, freed {dg.fmt_size(summary['freed'])}."
217      )
218      if summary.get("errors"):
219          base += f"\n  {len(summary['errors'])} error(s); see cleanup.log."
220      return base
221  
222  
223  def _handle_slash(raw_args: str) -> Optional[str]:
224      argv = raw_args.strip().split()
225      if not argv or argv[0] in ("help", "-h", "--help"):
226          return _HELP_TEXT
227  
228      sub = argv[0]
229  
230      if sub == "status":
231          return dg.format_status(dg.status())
232  
233      if sub == "dry-run":
234          auto, prompt = dg.dry_run()
235          auto_size = sum(i["size"] for i in auto)
236          prompt_size = sum(i["size"] for i in prompt)
237          lines = [
238              "Dry-run preview (nothing deleted):",
239              f"  Auto-delete : {len(auto)} files ({dg.fmt_size(auto_size)})",
240          ]
241          for item in auto:
242              lines.append(f"    [{item['category']}] {item['path']}")
243          lines.append(
244              f"  Needs prompt: {len(prompt)} files ({dg.fmt_size(prompt_size)})"
245          )
246          for item in prompt:
247              lines.append(f"    [{item['category']}] {item['path']}")
248          lines.append(
249              f"\n  Total potential: {dg.fmt_size(auto_size + prompt_size)}"
250          )
251          return "\n".join(lines)
252  
253      if sub == "quick":
254          return _fmt_summary(dg.quick())
255  
256      if sub == "deep":
257          # In-session deep can't prompt the user interactively — show what
258          # quick cleaned plus the items that WOULD need confirmation.
259          quick_summary = dg.quick()
260          _auto, prompt_items = dg.dry_run()
261          lines = [_fmt_summary(quick_summary)]
262          if prompt_items:
263              size = sum(i["size"] for i in prompt_items)
264              lines.append(
265                  f"\n{len(prompt_items)} item(s) need confirmation "
266                  f"({dg.fmt_size(size)}):"
267              )
268              for item in prompt_items:
269                  lines.append(f"  [{item['category']}] {item['path']}")
270              lines.append(
271                  "\nRun `/disk-cleanup forget <path>` to skip, or delete "
272                  "manually via terminal."
273              )
274          return "\n".join(lines)
275  
276      if sub == "track":
277          if len(argv) < 3:
278              return "Usage: /disk-cleanup track <path> <category>"
279          path_arg = argv[1]
280          category = argv[2]
281          if category not in dg.ALLOWED_CATEGORIES:
282              return (
283                  f"Unknown category '{category}'. "
284                  f"Allowed: {sorted(dg.ALLOWED_CATEGORIES)}"
285              )
286          if dg.track(path_arg, category, silent=True):
287              return f"Tracked {path_arg} as '{category}'."
288          return (
289              f"Not tracked (already present, missing, or outside HERMES_HOME): "
290              f"{path_arg}"
291          )
292  
293      if sub == "forget":
294          if len(argv) < 2:
295              return "Usage: /disk-cleanup forget <path>"
296          n = dg.forget(argv[1])
297          return (
298              f"Removed {n} tracking entr{'y' if n == 1 else 'ies'} for {argv[1]}."
299              if n else f"Not found in tracking: {argv[1]}"
300          )
301  
302      return f"Unknown subcommand: {sub}\n\n{_HELP_TEXT}"
303  
304  
305  # ---------------------------------------------------------------------------
306  # Plugin registration
307  # ---------------------------------------------------------------------------
308  
309  def register(ctx) -> None:
310      ctx.register_hook("post_tool_call", _on_post_tool_call)
311      ctx.register_hook("on_session_end", _on_session_end)
312      ctx.register_command(
313          "disk-cleanup",
314          handler=_handle_slash,
315          description="Track and clean up ephemeral Hermes session files.",
316      )