Cradicle Explorer

/ tests / hermes_cli / test_tool_token_estimation.py
test_tool_token_estimation.py
  1  """Tests for tool token estimation and curses_ui status_fn support."""
  2  
  3  from unittest.mock import patch
  4  
  5  import pytest
  6  
  7  # tiktoken is not in core/[all] deps — skip estimation tests when unavailable
  8  _has_tiktoken = True
  9  try:
 10      import tiktoken  # noqa: F401
 11  except ImportError:
 12      _has_tiktoken = False
 13  
 14  _needs_tiktoken = pytest.mark.skipif(not _has_tiktoken, reason="tiktoken not installed")
 15  
 16  
 17  # ─── Token Estimation Tests ──────────────────────────────────────────────────
 18  
 19  
 20  @_needs_tiktoken
 21  def test_estimate_tool_tokens_returns_positive_counts():
 22      """_estimate_tool_tokens should return a non-empty dict with positive values."""
 23      from hermes_cli.tools_config import _estimate_tool_tokens, _tool_token_cache
 24  
 25      # Clear cache to force fresh computation
 26      import hermes_cli.tools_config as tc
 27      tc._tool_token_cache = None
 28  
 29      tokens = _estimate_tool_tokens()
 30  
 31      assert isinstance(tokens, dict)
 32      assert len(tokens) > 0
 33      for name, count in tokens.items():
 34          assert isinstance(name, str)
 35          assert isinstance(count, int)
 36          assert count > 0, f"Tool {name} has non-positive token count: {count}"
 37  
 38  
 39  @_needs_tiktoken
 40  def test_estimate_tool_tokens_is_cached():
 41      """Second call should return the same cached dict object."""
 42      import hermes_cli.tools_config as tc
 43      tc._tool_token_cache = None
 44  
 45      first = tc._estimate_tool_tokens()
 46      second = tc._estimate_tool_tokens()
 47  
 48      assert first is second
 49  
 50  
 51  def test_estimate_tool_tokens_returns_empty_when_tiktoken_unavailable(monkeypatch):
 52      """Graceful degradation when tiktoken cannot be imported."""
 53      import hermes_cli.tools_config as tc
 54      tc._tool_token_cache = None
 55  
 56      import builtins
 57      real_import = builtins.__import__
 58  
 59      def mock_import(name, *args, **kwargs):
 60          if name == "tiktoken":
 61              raise ImportError("mocked")
 62          return real_import(name, *args, **kwargs)
 63  
 64      monkeypatch.setattr(builtins, "__import__", mock_import)
 65  
 66      result = tc._estimate_tool_tokens()
 67  
 68      assert result == {}
 69  
 70      # Reset cache for other tests
 71      tc._tool_token_cache = None
 72  
 73  
 74  @_needs_tiktoken
 75  def test_estimate_tool_tokens_covers_known_tools():
 76      """Should include schemas for well-known tools like terminal, web_search."""
 77      import hermes_cli.tools_config as tc
 78      tc._tool_token_cache = None
 79  
 80      tokens = tc._estimate_tool_tokens()
 81  
 82      # These tools should always be discoverable
 83      for expected in ("terminal", "web_search", "read_file"):
 84          assert expected in tokens, f"Expected {expected!r} in token estimates"
 85  
 86  
 87  # ─── Status Function Tests ───────────────────────────────────────────────────
 88  
 89  
 90  def test_prompt_toolset_checklist_passes_status_fn(monkeypatch):
 91      """_prompt_toolset_checklist should pass a status_fn to curses_checklist."""
 92      import hermes_cli.tools_config as tc
 93  
 94      captured_kwargs = {}
 95  
 96      def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None):
 97          captured_kwargs["status_fn"] = status_fn
 98          captured_kwargs["title"] = title
 99          return selected  # Return pre-selected unchanged
100  
101      monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist)
102  
103      tc._prompt_toolset_checklist("CLI", {"web", "terminal"})
104  
105      assert "status_fn" in captured_kwargs
106      # If tiktoken is available, status_fn should be set
107      tokens = tc._estimate_tool_tokens()
108      if tokens:
109          assert captured_kwargs["status_fn"] is not None
110  
111  
112  def test_status_fn_returns_formatted_token_count(monkeypatch):
113      """The status_fn should return a human-readable token count string."""
114      import hermes_cli.tools_config as tc
115      from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS
116  
117      captured = {}
118  
119      def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None):
120          captured["status_fn"] = status_fn
121          return selected
122  
123      monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist)
124  
125      tc._prompt_toolset_checklist("CLI", {"web", "terminal"})
126  
127      status_fn = captured.get("status_fn")
128      if status_fn is None:
129          pytest.skip("tiktoken unavailable; status_fn not created")
130  
131      # Find the indices for web and terminal
132      idx_map = {ts_key: i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)}
133  
134      # Call status_fn with web + terminal selected
135      result = status_fn({idx_map["web"], idx_map["terminal"]})
136      assert "tokens" in result
137      assert "Est. tool context" in result
138  
139  
140  def test_status_fn_deduplicates_overlapping_tools(monkeypatch):
141      """When toolsets overlap (browser includes web_search), tokens should not double-count."""
142      import hermes_cli.tools_config as tc
143      from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS
144  
145      captured = {}
146  
147      def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None):
148          captured["status_fn"] = status_fn
149          return selected
150  
151      monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist)
152  
153      tc._prompt_toolset_checklist("CLI", {"web"})
154  
155      status_fn = captured.get("status_fn")
156      if status_fn is None:
157          pytest.skip("tiktoken unavailable; status_fn not created")
158  
159      idx_map = {ts_key: i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)}
160  
161      # web alone
162      web_only = status_fn({idx_map["web"]})
163      # browser includes web_search, so browser + web should not double-count web_search
164      browser_only = status_fn({idx_map["browser"]})
165      both = status_fn({idx_map["web"], idx_map["browser"]})
166  
167      # Extract numeric token counts from strings like "~8.3k tokens" or "~350 tokens"
168      import re
169  
170      def parse_tokens(s):
171          m = re.search(r"~([\d.]+)k?\s+tokens", s)
172          if not m:
173              return 0
174          val = float(m.group(1))
175          if "k" in s[m.start():m.end()]:
176              val *= 1000
177          return val
178  
179      web_tok = parse_tokens(web_only)
180      browser_tok = parse_tokens(browser_only)
181      both_tok = parse_tokens(both)
182  
183      # Both together should be LESS than naive sum (due to web_search dedup)
184      naive_sum = web_tok + browser_tok
185      assert both_tok < naive_sum, (
186          f"Expected deduplication: web({web_tok}) + browser({browser_tok}) = {naive_sum} "
187          f"but combined = {both_tok}"
188      )
189  
190  
191  def test_status_fn_empty_selection():
192      """Status function with no tools selected should return ~0 tokens."""
193      import hermes_cli.tools_config as tc
194  
195      tc._tool_token_cache = None
196      tokens = tc._estimate_tool_tokens()
197      if not tokens:
198          pytest.skip("tiktoken unavailable")
199  
200      from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS
201      from toolsets import resolve_toolset
202  
203      ts_keys = [ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS]
204  
205      def status_fn(chosen: set) -> str:
206          all_tools: set = set()
207          for idx in chosen:
208              all_tools.update(resolve_toolset(ts_keys[idx]))
209          total = sum(tokens.get(name, 0) for name in all_tools)
210          if total >= 1000:
211              return f"Est. tool context: ~{total / 1000:.1f}k tokens"
212          return f"Est. tool context: ~{total} tokens"
213  
214      result = status_fn(set())
215      assert "~0 tokens" in result
216  
217  
218  # ─── Curses UI Status Bar Tests ──────────────────────────────────────────────
219  
220  
221  def test_curses_checklist_numbered_fallback_shows_status(monkeypatch, capsys):
222      """The numbered fallback should print the status_fn output."""
223      from hermes_cli.curses_ui import _numbered_fallback
224  
225      def my_status(chosen):
226          return f"Selected {len(chosen)} items"
227  
228      # Simulate user pressing Enter immediately (empty input → confirm)
229      monkeypatch.setattr("builtins.input", lambda _prompt="": "")
230  
231      result = _numbered_fallback(
232          "Test title",
233          ["Item A", "Item B", "Item C"],
234          {0, 2},
235          {0, 2},
236          status_fn=my_status,
237      )
238  
239      captured = capsys.readouterr()
240      assert "Selected 2 items" in captured.out
241      assert result == {0, 2}
242  
243  
244  def test_curses_checklist_numbered_fallback_without_status(monkeypatch, capsys):
245      """The numbered fallback should work fine without status_fn."""
246      from hermes_cli.curses_ui import _numbered_fallback
247  
248      monkeypatch.setattr("builtins.input", lambda _prompt="": "")
249  
250      result = _numbered_fallback(
251          "Test title",
252          ["Item A", "Item B"],
253          {0},
254          {0},
255      )
256  
257      captured = capsys.readouterr()
258      assert "Est. tool context" not in captured.out
259      assert result == {0}
260  
261  
262  # ─── Registry get_schema Tests ───────────────────────────────────────────────
263  
264  
265  def test_registry_get_schema_returns_schema():
266      """registry.get_schema() should return a tool's schema dict."""
267      from tools.registry import registry
268  
269      # Import to trigger discovery
270      import model_tools  # noqa: F401
271  
272      schema = registry.get_schema("terminal")
273      assert schema is not None
274      assert "name" in schema
275      assert schema["name"] == "terminal"
276      assert "parameters" in schema
277  
278  
279  def test_registry_get_schema_returns_none_for_unknown():
280      """registry.get_schema() should return None for unknown tools."""
281      from tools.registry import registry
282  
283      assert registry.get_schema("nonexistent_tool_xyz") is None