test_tool_token_estimation.py
1 """Tests for tool token estimation and curses_ui status_fn support.""" 2 3 from unittest.mock import patch 4 5 import pytest 6 7 # tiktoken is not in core/[all] deps — skip estimation tests when unavailable 8 _has_tiktoken = True 9 try: 10 import tiktoken # noqa: F401 11 except ImportError: 12 _has_tiktoken = False 13 14 _needs_tiktoken = pytest.mark.skipif(not _has_tiktoken, reason="tiktoken not installed") 15 16 17 # ─── Token Estimation Tests ────────────────────────────────────────────────── 18 19 20 @_needs_tiktoken 21 def test_estimate_tool_tokens_returns_positive_counts(): 22 """_estimate_tool_tokens should return a non-empty dict with positive values.""" 23 from hermes_cli.tools_config import _estimate_tool_tokens, _tool_token_cache 24 25 # Clear cache to force fresh computation 26 import hermes_cli.tools_config as tc 27 tc._tool_token_cache = None 28 29 tokens = _estimate_tool_tokens() 30 31 assert isinstance(tokens, dict) 32 assert len(tokens) > 0 33 for name, count in tokens.items(): 34 assert isinstance(name, str) 35 assert isinstance(count, int) 36 assert count > 0, f"Tool {name} has non-positive token count: {count}" 37 38 39 @_needs_tiktoken 40 def test_estimate_tool_tokens_is_cached(): 41 """Second call should return the same cached dict object.""" 42 import hermes_cli.tools_config as tc 43 tc._tool_token_cache = None 44 45 first = tc._estimate_tool_tokens() 46 second = tc._estimate_tool_tokens() 47 48 assert first is second 49 50 51 def test_estimate_tool_tokens_returns_empty_when_tiktoken_unavailable(monkeypatch): 52 """Graceful degradation when tiktoken cannot be imported.""" 53 import hermes_cli.tools_config as tc 54 tc._tool_token_cache = None 55 56 import builtins 57 real_import = builtins.__import__ 58 59 def mock_import(name, *args, **kwargs): 60 if name == "tiktoken": 61 raise ImportError("mocked") 62 return real_import(name, *args, **kwargs) 63 64 monkeypatch.setattr(builtins, "__import__", mock_import) 65 66 result = tc._estimate_tool_tokens() 67 68 assert result == {} 69 70 # Reset cache for other tests 71 tc._tool_token_cache = None 72 73 74 @_needs_tiktoken 75 def test_estimate_tool_tokens_covers_known_tools(): 76 """Should include schemas for well-known tools like terminal, web_search.""" 77 import hermes_cli.tools_config as tc 78 tc._tool_token_cache = None 79 80 tokens = tc._estimate_tool_tokens() 81 82 # These tools should always be discoverable 83 for expected in ("terminal", "web_search", "read_file"): 84 assert expected in tokens, f"Expected {expected!r} in token estimates" 85 86 87 # ─── Status Function Tests ─────────────────────────────────────────────────── 88 89 90 def test_prompt_toolset_checklist_passes_status_fn(monkeypatch): 91 """_prompt_toolset_checklist should pass a status_fn to curses_checklist.""" 92 import hermes_cli.tools_config as tc 93 94 captured_kwargs = {} 95 96 def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None): 97 captured_kwargs["status_fn"] = status_fn 98 captured_kwargs["title"] = title 99 return selected # Return pre-selected unchanged 100 101 monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist) 102 103 tc._prompt_toolset_checklist("CLI", {"web", "terminal"}) 104 105 assert "status_fn" in captured_kwargs 106 # If tiktoken is available, status_fn should be set 107 tokens = tc._estimate_tool_tokens() 108 if tokens: 109 assert captured_kwargs["status_fn"] is not None 110 111 112 def test_status_fn_returns_formatted_token_count(monkeypatch): 113 """The status_fn should return a human-readable token count string.""" 114 import hermes_cli.tools_config as tc 115 from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS 116 117 captured = {} 118 119 def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None): 120 captured["status_fn"] = status_fn 121 return selected 122 123 monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist) 124 125 tc._prompt_toolset_checklist("CLI", {"web", "terminal"}) 126 127 status_fn = captured.get("status_fn") 128 if status_fn is None: 129 pytest.skip("tiktoken unavailable; status_fn not created") 130 131 # Find the indices for web and terminal 132 idx_map = {ts_key: i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)} 133 134 # Call status_fn with web + terminal selected 135 result = status_fn({idx_map["web"], idx_map["terminal"]}) 136 assert "tokens" in result 137 assert "Est. tool context" in result 138 139 140 def test_status_fn_deduplicates_overlapping_tools(monkeypatch): 141 """When toolsets overlap (browser includes web_search), tokens should not double-count.""" 142 import hermes_cli.tools_config as tc 143 from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS 144 145 captured = {} 146 147 def fake_checklist(title, items, selected, *, cancel_returns=None, status_fn=None): 148 captured["status_fn"] = status_fn 149 return selected 150 151 monkeypatch.setattr("hermes_cli.curses_ui.curses_checklist", fake_checklist) 152 153 tc._prompt_toolset_checklist("CLI", {"web"}) 154 155 status_fn = captured.get("status_fn") 156 if status_fn is None: 157 pytest.skip("tiktoken unavailable; status_fn not created") 158 159 idx_map = {ts_key: i for i, (ts_key, _, _) in enumerate(CONFIGURABLE_TOOLSETS)} 160 161 # web alone 162 web_only = status_fn({idx_map["web"]}) 163 # browser includes web_search, so browser + web should not double-count web_search 164 browser_only = status_fn({idx_map["browser"]}) 165 both = status_fn({idx_map["web"], idx_map["browser"]}) 166 167 # Extract numeric token counts from strings like "~8.3k tokens" or "~350 tokens" 168 import re 169 170 def parse_tokens(s): 171 m = re.search(r"~([\d.]+)k?\s+tokens", s) 172 if not m: 173 return 0 174 val = float(m.group(1)) 175 if "k" in s[m.start():m.end()]: 176 val *= 1000 177 return val 178 179 web_tok = parse_tokens(web_only) 180 browser_tok = parse_tokens(browser_only) 181 both_tok = parse_tokens(both) 182 183 # Both together should be LESS than naive sum (due to web_search dedup) 184 naive_sum = web_tok + browser_tok 185 assert both_tok < naive_sum, ( 186 f"Expected deduplication: web({web_tok}) + browser({browser_tok}) = {naive_sum} " 187 f"but combined = {both_tok}" 188 ) 189 190 191 def test_status_fn_empty_selection(): 192 """Status function with no tools selected should return ~0 tokens.""" 193 import hermes_cli.tools_config as tc 194 195 tc._tool_token_cache = None 196 tokens = tc._estimate_tool_tokens() 197 if not tokens: 198 pytest.skip("tiktoken unavailable") 199 200 from hermes_cli.tools_config import CONFIGURABLE_TOOLSETS 201 from toolsets import resolve_toolset 202 203 ts_keys = [ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS] 204 205 def status_fn(chosen: set) -> str: 206 all_tools: set = set() 207 for idx in chosen: 208 all_tools.update(resolve_toolset(ts_keys[idx])) 209 total = sum(tokens.get(name, 0) for name in all_tools) 210 if total >= 1000: 211 return f"Est. tool context: ~{total / 1000:.1f}k tokens" 212 return f"Est. tool context: ~{total} tokens" 213 214 result = status_fn(set()) 215 assert "~0 tokens" in result 216 217 218 # ─── Curses UI Status Bar Tests ────────────────────────────────────────────── 219 220 221 def test_curses_checklist_numbered_fallback_shows_status(monkeypatch, capsys): 222 """The numbered fallback should print the status_fn output.""" 223 from hermes_cli.curses_ui import _numbered_fallback 224 225 def my_status(chosen): 226 return f"Selected {len(chosen)} items" 227 228 # Simulate user pressing Enter immediately (empty input → confirm) 229 monkeypatch.setattr("builtins.input", lambda _prompt="": "") 230 231 result = _numbered_fallback( 232 "Test title", 233 ["Item A", "Item B", "Item C"], 234 {0, 2}, 235 {0, 2}, 236 status_fn=my_status, 237 ) 238 239 captured = capsys.readouterr() 240 assert "Selected 2 items" in captured.out 241 assert result == {0, 2} 242 243 244 def test_curses_checklist_numbered_fallback_without_status(monkeypatch, capsys): 245 """The numbered fallback should work fine without status_fn.""" 246 from hermes_cli.curses_ui import _numbered_fallback 247 248 monkeypatch.setattr("builtins.input", lambda _prompt="": "") 249 250 result = _numbered_fallback( 251 "Test title", 252 ["Item A", "Item B"], 253 {0}, 254 {0}, 255 ) 256 257 captured = capsys.readouterr() 258 assert "Est. tool context" not in captured.out 259 assert result == {0} 260 261 262 # ─── Registry get_schema Tests ─────────────────────────────────────────────── 263 264 265 def test_registry_get_schema_returns_schema(): 266 """registry.get_schema() should return a tool's schema dict.""" 267 from tools.registry import registry 268 269 # Import to trigger discovery 270 import model_tools # noqa: F401 271 272 schema = registry.get_schema("terminal") 273 assert schema is not None 274 assert "name" in schema 275 assert schema["name"] == "terminal" 276 assert "parameters" in schema 277 278 279 def test_registry_get_schema_returns_none_for_unknown(): 280 """registry.get_schema() should return None for unknown tools.""" 281 from tools.registry import registry 282 283 assert registry.get_schema("nonexistent_tool_xyz") is None