Cradicle Explorer

/ tests / run_agent / test_background_review.py
test_background_review.py
  1  """Regression tests for background review agent cleanup."""
  2  
  3  from __future__ import annotations
  4  
  5  import run_agent as run_agent_module
  6  from run_agent import AIAgent
  7  
  8  
  9  def _bare_agent() -> AIAgent:
 10      agent = object.__new__(AIAgent)
 11      agent.model = "fake-model"
 12      agent.platform = "telegram"
 13      agent.provider = "openai"
 14      agent.base_url = ""
 15      agent.api_key = ""
 16      agent.api_mode = ""
 17      agent.session_id = "test-session"
 18      agent._parent_session_id = ""
 19      agent._credential_pool = None
 20      agent._memory_store = object()
 21      agent._memory_enabled = True
 22      agent._user_profile_enabled = False
 23      agent._MEMORY_REVIEW_PROMPT = "review memory"
 24      agent._SKILL_REVIEW_PROMPT = "review skills"
 25      agent._COMBINED_REVIEW_PROMPT = "review both"
 26      agent.background_review_callback = None
 27      agent.status_callback = None
 28      agent._safe_print = lambda *_args, **_kwargs: None
 29      return agent
 30  
 31  
 32  class ImmediateThread:
 33      def __init__(self, *, target, daemon=None, name=None):
 34          self._target = target
 35  
 36      def start(self):
 37          self._target()
 38  
 39  
 40  def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
 41      events = []
 42  
 43      class FakeReviewAgent:
 44          def __init__(self, **kwargs):
 45              events.append(("init", kwargs))
 46              self._session_messages = []
 47  
 48          def run_conversation(self, **kwargs):
 49              events.append(("run_conversation", kwargs))
 50  
 51          def shutdown_memory_provider(self):
 52              events.append(("shutdown_memory_provider", None))
 53  
 54          def close(self):
 55              events.append(("close", None))
 56  
 57      monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
 58      monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
 59  
 60      agent = _bare_agent()
 61  
 62      AIAgent._spawn_background_review(
 63          agent,
 64          messages_snapshot=[{"role": "user", "content": "hello"}],
 65          review_memory=True,
 66      )
 67  
 68      assert [name for name, _payload in events] == [
 69          "init",
 70          "run_conversation",
 71          "shutdown_memory_provider",
 72          "close",
 73      ]
 74  
 75  
 76  def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
 77      """Regression guard for #15216.
 78  
 79      The background review thread must install a non-interactive approval
 80      callback. If it doesn't, any dangerous-command guard the review agent
 81      trips falls back to input() on a daemon thread, which deadlocks against
 82      the parent's prompt_toolkit TUI.
 83      """
 84      import tools.terminal_tool as tt
 85  
 86      observed: dict = {"during_run": "<unread>", "after_finally": "<unread>"}
 87  
 88      class FakeReviewAgent:
 89          def __init__(self, **kwargs):
 90              self._session_messages = []
 91  
 92          def run_conversation(self, **kwargs):
 93              # Capture what the callback looks like mid-run. It must be
 94              # a callable (the auto-deny) -- not None.
 95              observed["during_run"] = tt._get_approval_callback()
 96  
 97          def shutdown_memory_provider(self):
 98              pass
 99  
100          def close(self):
101              pass
102  
103      monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
104      monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
105  
106      # Start from a clean slot.
107      tt.set_approval_callback(None)
108      agent = _bare_agent()
109  
110      AIAgent._spawn_background_review(
111          agent,
112          messages_snapshot=[{"role": "user", "content": "hello"}],
113          review_memory=True,
114      )
115  
116      observed["after_finally"] = tt._get_approval_callback()
117  
118      assert callable(observed["during_run"]), (
119          "Background review did not install an approval callback on its "
120          "worker thread; dangerous-command prompts will deadlock against "
121          "the parent TUI (#15216)."
122      )
123      # The installed callback must deny (it's a safety gate, not a prompt).
124      assert observed["during_run"]("rm -rf /", "test") == "deny"
125  
126      assert observed["after_finally"] is None, (
127          "Background review leaked its approval callback into the worker "
128          "thread's TLS slot; a recycled thread-id could reuse it."
129      )
130  
131  
132  def test_background_review_summary_is_attributed_to_self_improvement_loop(monkeypatch):
133      """The CLI/gateway emission must identify the self-improvement loop.
134  
135      Users who miss the line in their terminal have no way to tell that the
136      background review was what modified their skill/memory stores. The
137      summary prefix ``💾 Self-improvement review: …`` makes the origin
138      explicit so both the CLI and gateway deliveries are unambiguous.
139      """
140      import json
141  
142      captured_prints: list = []
143      captured_bg_callback: list = []
144  
145      class FakeReviewAgent:
146          def __init__(self, **kwargs):
147              # Simulate a review that successfully updated memory so
148              # _summarize_background_review_actions returns a real action.
149              self._session_messages = [
150                  {
151                      "role": "tool",
152                      "tool_call_id": "call_bg",
153                      "content": json.dumps(
154                          {"success": True, "message": "Entry added", "target": "memory"}
155                      ),
156                  }
157              ]
158  
159          def run_conversation(self, **kwargs):
160              pass
161  
162          def shutdown_memory_provider(self):
163              pass
164  
165          def close(self):
166              pass
167  
168      monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
169      monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
170  
171      agent = _bare_agent()
172      agent._safe_print = lambda *a, **kw: captured_prints.append(" ".join(str(x) for x in a))
173      agent.background_review_callback = lambda msg: captured_bg_callback.append(msg)
174  
175      AIAgent._spawn_background_review(
176          agent,
177          messages_snapshot=[{"role": "user", "content": "hi"}],
178          review_memory=True,
179      )
180  
181      # Exactly one summary should have been emitted, and it must identify
182      # the self-improvement review explicitly.
183      assert len(captured_prints) == 1, captured_prints
184      printed = captured_prints[0]
185      assert "Self-improvement review" in printed, printed
186      assert "Memory updated" in printed, printed
187  
188      # Gateway path gets the same prefix.
189      assert len(captured_bg_callback) == 1
190      assert captured_bg_callback[0].startswith("💾 Self-improvement review:"), (
191          captured_bg_callback[0]
192      )