test_background_review.py
1 """Regression tests for background review agent cleanup.""" 2 3 from __future__ import annotations 4 5 import run_agent as run_agent_module 6 from run_agent import AIAgent 7 8 9 def _bare_agent() -> AIAgent: 10 agent = object.__new__(AIAgent) 11 agent.model = "fake-model" 12 agent.platform = "telegram" 13 agent.provider = "openai" 14 agent.base_url = "" 15 agent.api_key = "" 16 agent.api_mode = "" 17 agent.session_id = "test-session" 18 agent._parent_session_id = "" 19 agent._credential_pool = None 20 agent._memory_store = object() 21 agent._memory_enabled = True 22 agent._user_profile_enabled = False 23 agent._MEMORY_REVIEW_PROMPT = "review memory" 24 agent._SKILL_REVIEW_PROMPT = "review skills" 25 agent._COMBINED_REVIEW_PROMPT = "review both" 26 agent.background_review_callback = None 27 agent.status_callback = None 28 agent._safe_print = lambda *_args, **_kwargs: None 29 return agent 30 31 32 class ImmediateThread: 33 def __init__(self, *, target, daemon=None, name=None): 34 self._target = target 35 36 def start(self): 37 self._target() 38 39 40 def test_background_review_shuts_down_memory_provider_before_close(monkeypatch): 41 events = [] 42 43 class FakeReviewAgent: 44 def __init__(self, **kwargs): 45 events.append(("init", kwargs)) 46 self._session_messages = [] 47 48 def run_conversation(self, **kwargs): 49 events.append(("run_conversation", kwargs)) 50 51 def shutdown_memory_provider(self): 52 events.append(("shutdown_memory_provider", None)) 53 54 def close(self): 55 events.append(("close", None)) 56 57 monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent) 58 monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread) 59 60 agent = _bare_agent() 61 62 AIAgent._spawn_background_review( 63 agent, 64 messages_snapshot=[{"role": "user", "content": "hello"}], 65 review_memory=True, 66 ) 67 68 assert [name for name, _payload in events] == [ 69 "init", 70 "run_conversation", 71 "shutdown_memory_provider", 72 "close", 73 ] 74 75 76 def test_background_review_installs_auto_deny_approval_callback(monkeypatch): 77 """Regression guard for #15216. 78 79 The background review thread must install a non-interactive approval 80 callback. If it doesn't, any dangerous-command guard the review agent 81 trips falls back to input() on a daemon thread, which deadlocks against 82 the parent's prompt_toolkit TUI. 83 """ 84 import tools.terminal_tool as tt 85 86 observed: dict = {"during_run": "<unread>", "after_finally": "<unread>"} 87 88 class FakeReviewAgent: 89 def __init__(self, **kwargs): 90 self._session_messages = [] 91 92 def run_conversation(self, **kwargs): 93 # Capture what the callback looks like mid-run. It must be 94 # a callable (the auto-deny) -- not None. 95 observed["during_run"] = tt._get_approval_callback() 96 97 def shutdown_memory_provider(self): 98 pass 99 100 def close(self): 101 pass 102 103 monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent) 104 monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread) 105 106 # Start from a clean slot. 107 tt.set_approval_callback(None) 108 agent = _bare_agent() 109 110 AIAgent._spawn_background_review( 111 agent, 112 messages_snapshot=[{"role": "user", "content": "hello"}], 113 review_memory=True, 114 ) 115 116 observed["after_finally"] = tt._get_approval_callback() 117 118 assert callable(observed["during_run"]), ( 119 "Background review did not install an approval callback on its " 120 "worker thread; dangerous-command prompts will deadlock against " 121 "the parent TUI (#15216)." 122 ) 123 # The installed callback must deny (it's a safety gate, not a prompt). 124 assert observed["during_run"]("rm -rf /", "test") == "deny" 125 126 assert observed["after_finally"] is None, ( 127 "Background review leaked its approval callback into the worker " 128 "thread's TLS slot; a recycled thread-id could reuse it." 129 ) 130 131 132 def test_background_review_summary_is_attributed_to_self_improvement_loop(monkeypatch): 133 """The CLI/gateway emission must identify the self-improvement loop. 134 135 Users who miss the line in their terminal have no way to tell that the 136 background review was what modified their skill/memory stores. The 137 summary prefix ``💾 Self-improvement review: …`` makes the origin 138 explicit so both the CLI and gateway deliveries are unambiguous. 139 """ 140 import json 141 142 captured_prints: list = [] 143 captured_bg_callback: list = [] 144 145 class FakeReviewAgent: 146 def __init__(self, **kwargs): 147 # Simulate a review that successfully updated memory so 148 # _summarize_background_review_actions returns a real action. 149 self._session_messages = [ 150 { 151 "role": "tool", 152 "tool_call_id": "call_bg", 153 "content": json.dumps( 154 {"success": True, "message": "Entry added", "target": "memory"} 155 ), 156 } 157 ] 158 159 def run_conversation(self, **kwargs): 160 pass 161 162 def shutdown_memory_provider(self): 163 pass 164 165 def close(self): 166 pass 167 168 monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent) 169 monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread) 170 171 agent = _bare_agent() 172 agent._safe_print = lambda *a, **kw: captured_prints.append(" ".join(str(x) for x in a)) 173 agent.background_review_callback = lambda msg: captured_bg_callback.append(msg) 174 175 AIAgent._spawn_background_review( 176 agent, 177 messages_snapshot=[{"role": "user", "content": "hi"}], 178 review_memory=True, 179 ) 180 181 # Exactly one summary should have been emitted, and it must identify 182 # the self-improvement review explicitly. 183 assert len(captured_prints) == 1, captured_prints 184 printed = captured_prints[0] 185 assert "Self-improvement review" in printed, printed 186 assert "Memory updated" in printed, printed 187 188 # Gateway path gets the same prefix. 189 assert len(captured_bg_callback) == 1 190 assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), ( 191 captured_bg_callback[0] 192 )