Cradicle Explorer

/ tests / agent / test_subagent_progress.py
test_subagent_progress.py
  1  """
  2  Tests for subagent progress relay (issue #169).
  3  
  4  Verifies that:
  5  - KawaiiSpinner.print_above() works with and without active spinner
  6  - _build_child_progress_callback handles CLI/gateway/no-display paths
  7  - Thinking events are relayed correctly
  8  - Parallel callbacks don't share state
  9  """
 10  
 11  import io
 12  import sys
 13  import time
 14  import threading
 15  import pytest
 16  from unittest.mock import MagicMock, patch
 17  
 18  from agent.display import KawaiiSpinner
 19  from tools.delegate_tool import _build_child_progress_callback
 20  
 21  
 22  # =========================================================================
 23  # KawaiiSpinner.print_above tests
 24  # =========================================================================
 25  
 26  class TestPrintAbove:
 27      """Tests for KawaiiSpinner.print_above method."""
 28  
 29      def test_print_above_without_spinner_running(self):
 30          """print_above should write to stdout even when spinner is not running."""
 31          buf = io.StringIO()
 32          spinner = KawaiiSpinner("test")
 33          spinner._out = buf  # Redirect to buffer
 34          
 35          spinner.print_above("hello world")
 36          output = buf.getvalue()
 37          assert "hello world" in output
 38  
 39      def test_print_above_with_spinner_running(self):
 40          """print_above should clear spinner line and print text."""
 41          buf = io.StringIO()
 42          spinner = KawaiiSpinner("test")
 43          spinner._out = buf
 44          spinner.running = True  # Pretend spinner is running (don't start thread)
 45          
 46          spinner.print_above("tool line")
 47          output = buf.getvalue()
 48          assert "tool line" in output
 49          assert "\r" in output  # Should start with carriage return to clear spinner line
 50  
 51      def test_print_above_uses_captured_stdout(self):
 52          """print_above should use self._out, not sys.stdout.
 53          This ensures it works inside redirect_stdout(devnull)."""
 54          buf = io.StringIO()
 55          spinner = KawaiiSpinner("test")
 56          spinner._out = buf
 57          
 58          # Simulate redirect_stdout(devnull)
 59          old_stdout = sys.stdout
 60          sys.stdout = io.StringIO()
 61          try:
 62              spinner.print_above("should go to buf")
 63          finally:
 64              sys.stdout = old_stdout
 65          
 66          assert "should go to buf" in buf.getvalue()
 67  
 68  
 69  # =========================================================================
 70  # _build_child_progress_callback tests
 71  # =========================================================================
 72  
 73  class TestBuildChildProgressCallback:
 74      """Tests for child progress callback builder."""
 75  
 76      def test_returns_none_when_no_display(self):
 77          """Should return None when parent has no spinner or callback."""
 78          parent = MagicMock()
 79          parent._delegate_spinner = None
 80          parent.tool_progress_callback = None
 81          
 82          cb = _build_child_progress_callback(0, "test goal", parent)
 83          assert cb is None
 84  
 85      def test_cli_spinner_tool_event(self):
 86          """Should print tool line above spinner for CLI path."""
 87          buf = io.StringIO()
 88          spinner = KawaiiSpinner("delegating")
 89          spinner._out = buf
 90          spinner.running = True
 91          
 92          parent = MagicMock()
 93          parent._delegate_spinner = spinner
 94          parent.tool_progress_callback = None
 95          
 96          cb = _build_child_progress_callback(0, "test goal", parent)
 97          assert cb is not None
 98          
 99          cb("tool.started", "web_search", "quantum computing", {})
100          output = buf.getvalue()
101          assert "web_search" in output
102          assert "quantum computing" in output
103          assert "├─" in output
104  
105      def test_cli_spinner_thinking_event(self):
106          """Should print thinking line above spinner for CLI path."""
107          buf = io.StringIO()
108          spinner = KawaiiSpinner("delegating")
109          spinner._out = buf
110          spinner.running = True
111          
112          parent = MagicMock()
113          parent._delegate_spinner = spinner
114          parent.tool_progress_callback = None
115          
116          cb = _build_child_progress_callback(0, "test goal", parent)
117          cb("_thinking", "I'll search for papers first")
118          
119          output = buf.getvalue()
120          assert "💭" in output
121          assert "search for papers" in output
122  
123      def test_gateway_batched_progress(self):
124          """Gateway path: each tool.started relays a subagent.tool event, and a
125          subagent.progress summary fires once BATCH_SIZE tools accumulate."""
126          parent = MagicMock()
127          parent._delegate_spinner = None
128          parent_cb = MagicMock()
129          parent.tool_progress_callback = parent_cb
130  
131          cb = _build_child_progress_callback(0, "test goal", parent)
132  
133          # Each tool.started relays a subagent.tool event immediately (per-tool relay).
134          for i in range(4):
135              cb("tool.started", f"tool_{i}", f"arg_{i}", {})
136          # 4 per-tool relays so far, no batch summary yet (BATCH_SIZE=5)
137          events = [c.args[0] for c in parent_cb.call_args_list]
138          assert events == ["subagent.tool"] * 4
139  
140          # 5th call triggers another per-tool relay PLUS the batch-size summary
141          cb("tool.started", "tool_4", "arg_4", {})
142          events = [c.args[0] for c in parent_cb.call_args_list]
143          assert events == ["subagent.tool"] * 5 + ["subagent.progress"]
144          summary_call = parent_cb.call_args_list[-1]
145          summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
146          assert "tool_0" in summary_text
147          assert "tool_4" in summary_text
148  
149      def test_thinking_relayed_to_gateway(self):
150          """Thinking events are relayed as subagent.thinking events."""
151          parent = MagicMock()
152          parent._delegate_spinner = None
153          parent_cb = MagicMock()
154          parent.tool_progress_callback = parent_cb
155  
156          cb = _build_child_progress_callback(0, "test goal", parent)
157          cb("_thinking", "some reasoning text")
158  
159          parent_cb.assert_called_once()
160          assert parent_cb.call_args.args[0] == "subagent.thinking"
161          assert parent_cb.call_args.args[2] == "some reasoning text"
162  
163      def test_parallel_callbacks_independent(self):
164          """Each child's callback batches tool names independently."""
165          parent = MagicMock()
166          parent._delegate_spinner = None
167          parent_cb = MagicMock()
168          parent.tool_progress_callback = parent_cb
169  
170          cb0 = _build_child_progress_callback(0, "goal a", parent)
171          cb1 = _build_child_progress_callback(1, "goal b", parent)
172  
173          # 3 tool.started per child = 6 per-tool relays; neither should hit
174          # the batch-size summary (batch size = 5, counted per-child).
175          for i in range(3):
176              cb0("tool.started", f"tool_{i}", f"a_{i}", {})
177              cb1("tool.started", f"other_{i}", f"b_{i}", {})
178  
179          events = [c.args[0] for c in parent_cb.call_args_list]
180          assert events.count("subagent.tool") == 6
181          assert "subagent.progress" not in events
182  
183      def test_task_index_prefix_in_batch_mode(self):
184          """Batch mode (task_count > 1) should show 1-indexed prefix for all tasks."""
185          buf = io.StringIO()
186          spinner = KawaiiSpinner("delegating")
187          spinner._out = buf
188          spinner.running = True
189          
190          parent = MagicMock()
191          parent._delegate_spinner = spinner
192          parent.tool_progress_callback = None
193          
194          # task_index=0 in a batch of 3 → prefix "[1]"
195          cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
196          cb0("tool.started", "web_search", "test", {})
197          output = buf.getvalue()
198          assert "[1]" in output
199  
200          # task_index=2 in a batch of 3 → prefix "[3]"
201          buf.truncate(0)
202          buf.seek(0)
203          cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
204          cb2("tool.started", "web_search", "test", {})
205          output = buf.getvalue()
206          assert "[3]" in output
207  
208      def test_single_task_no_prefix(self):
209          """Single task (task_count=1) should not show index prefix."""
210          buf = io.StringIO()
211          spinner = KawaiiSpinner("delegating")
212          spinner._out = buf
213          spinner.running = True
214          
215          parent = MagicMock()
216          parent._delegate_spinner = spinner
217          parent.tool_progress_callback = None
218          
219          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
220          cb("tool.started", "web_search", "test", {})
221          
222          output = buf.getvalue()
223          assert "[" not in output
224  
225  
226  # =========================================================================
227  # Integration: thinking callback in run_agent.py
228  # =========================================================================
229  
230  class TestThinkingCallback:
231      """Tests for the _thinking callback in AIAgent conversation loop."""
232  
233      def _simulate_thinking_callback(self, content, callback, delegate_depth=1):
234          """Simulate the exact code path from run_agent.py for the thinking callback.
235          
236          delegate_depth: simulates self._delegate_depth.
237              0 = main agent (should NOT fire), >=1 = subagent (should fire).
238          """
239          import re
240          if (content and callback and delegate_depth > 0):
241              _think_text = content.strip()
242              _think_text = re.sub(
243                  r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
244              ).strip()
245              first_line = _think_text.split('\n')[0][:80] if _think_text else ""
246              if first_line:
247                  try:
248                      callback("_thinking", first_line)
249                  except Exception:
250                      pass
251  
252      def test_thinking_callback_fires_on_content(self):
253          """tool_progress_callback should receive _thinking event
254          when assistant message has content."""
255          calls = []
256          self._simulate_thinking_callback(
257              "I'll research quantum computing first, then summarize.",
258              lambda name, preview=None: calls.append((name, preview))
259          )
260          assert len(calls) == 1
261          assert calls[0][0] == "_thinking"
262          assert "quantum computing" in calls[0][1]
263  
264      def test_thinking_callback_skipped_when_no_content(self):
265          """Should not fire when assistant has no content."""
266          calls = []
267          self._simulate_thinking_callback(
268              None,
269              lambda name, preview=None: calls.append((name, preview))
270          )
271          assert len(calls) == 0
272  
273      def test_thinking_callback_truncates_long_content(self):
274          """Should truncate long content to 80 chars."""
275          calls = []
276          self._simulate_thinking_callback(
277              "A" * 200 + "\nSecond line should be ignored",
278              lambda name, preview=None: calls.append((name, preview))
279          )
280          assert len(calls) == 1
281          assert len(calls[0][1]) == 80
282  
283      def test_thinking_callback_skipped_for_main_agent(self):
284          """Main agent (delegate_depth=0) should NOT fire thinking events.
285          This prevents gateway spam on Telegram/Discord."""
286          calls = []
287          self._simulate_thinking_callback(
288              "I'll help you with that request.",
289              lambda name, preview=None: calls.append((name, preview)),
290              delegate_depth=0,
291          )
292          assert len(calls) == 0
293  
294      def test_thinking_callback_strips_reasoning_scratchpad(self):
295          """REASONING_SCRATCHPAD tags should be stripped before display."""
296          calls = []
297          self._simulate_thinking_callback(
298              "<REASONING_SCRATCHPAD>I need to analyze this carefully</REASONING_SCRATCHPAD>",
299              lambda name, preview=None: calls.append((name, preview))
300          )
301          assert len(calls) == 1
302          assert "<REASONING_SCRATCHPAD>" not in calls[0][1]
303          assert "analyze this carefully" in calls[0][1]
304  
305      def test_thinking_callback_strips_think_tags(self):
306          """<think> tags should be stripped before display."""
307          calls = []
308          self._simulate_thinking_callback(
309              "<think>Let me think about this problem</think>",
310              lambda name, preview=None: calls.append((name, preview))
311          )
312          assert len(calls) == 1
313          assert "<think>" not in calls[0][1]
314          assert "think about this problem" in calls[0][1]
315  
316      def test_thinking_callback_empty_after_strip(self):
317          """Should not fire when content is only XML tags."""
318          calls = []
319          self._simulate_thinking_callback(
320              "<REASONING_SCRATCHPAD></REASONING_SCRATCHPAD>",
321              lambda name, preview=None: calls.append((name, preview))
322          )
323          assert len(calls) == 0
324  
325  
326  # =========================================================================
327  # Gateway batch flush tests
328  # =========================================================================
329  
330  class TestBatchFlush:
331      """Tests for gateway batch flush on subagent completion."""
332  
333      def test_flush_sends_remaining_batch(self):
334          """_flush should send a final subagent.progress summary of any unsent
335          tool names in the batch (less than BATCH_SIZE)."""
336          parent = MagicMock()
337          parent._delegate_spinner = None
338          parent_cb = MagicMock()
339          parent.tool_progress_callback = parent_cb
340  
341          cb = _build_child_progress_callback(0, "test goal", parent)
342  
343          # Send 3 tools (below batch size of 5) — each relays subagent.tool
344          cb("tool.started", "web_search", "query1", {})
345          cb("tool.started", "read_file", "file.txt", {})
346          cb("tool.started", "write_file", "out.txt", {})
347          events = [c.args[0] for c in parent_cb.call_args_list]
348          assert events == ["subagent.tool"] * 3  # per-tool relays so far
349          assert "subagent.progress" not in events  # no batch-size summary yet
350  
351          # Flush should send the remaining 3 as a summary
352          cb._flush()
353          events = [c.args[0] for c in parent_cb.call_args_list]
354          assert events[-1] == "subagent.progress"
355          summary_call = parent_cb.call_args_list[-1]
356          summary_text = summary_call.kwargs.get("preview") or summary_call.args[2]
357          assert "web_search" in summary_text
358          assert "write_file" in summary_text
359  
360      def test_flush_noop_when_batch_empty(self):
361          """_flush should not send anything when batch is empty."""
362          parent = MagicMock()
363          parent._delegate_spinner = None
364          parent_cb = MagicMock()
365          parent.tool_progress_callback = parent_cb
366  
367          cb = _build_child_progress_callback(0, "test goal", parent)
368          cb._flush()
369          parent_cb.assert_not_called()
370  
371      def test_flush_noop_when_no_parent_callback(self):
372          """_flush should not crash when there's no parent callback."""
373          buf = io.StringIO()
374          spinner = KawaiiSpinner("test")
375          spinner._out = buf
376          spinner.running = True
377  
378          parent = MagicMock()
379          parent._delegate_spinner = spinner
380          parent.tool_progress_callback = None
381  
382          cb = _build_child_progress_callback(0, "test goal", parent)
383          cb("tool.started", "web_search", "test", {})
384          cb._flush()  # Should not crash
385  
386  
387  if __name__ == "__main__":
388      pytest.main([__file__, "-v"])
389