test_background_review_summary.py
1 """Tests for AIAgent._summarize_background_review_actions. 2 3 Regression coverage for issue #14944: the background memory/skill review used 4 to re-surface tool results that were already present in the conversation 5 history before the review started (e.g. an earlier "Cron job '...' created."). 6 """ 7 8 import json 9 10 from run_agent import AIAgent 11 12 13 _summarize = AIAgent._summarize_background_review_actions 14 15 16 def _tool_msg(tool_call_id, payload): 17 return { 18 "role": "tool", 19 "tool_call_id": tool_call_id, 20 "content": json.dumps(payload), 21 } 22 23 24 def test_skips_prior_tool_messages_by_tool_call_id(): 25 """Stale 'created' tool result from prior history must not be re-surfaced.""" 26 prior_payload = {"success": True, "message": "Cron job 'remind-me' created."} 27 new_payload = { 28 "success": True, 29 "message": "Entry added", 30 "target": "user", 31 } 32 33 snapshot = [ 34 {"role": "user", "content": "create a reminder"}, 35 _tool_msg("call_old", prior_payload), 36 {"role": "assistant", "content": "done"}, 37 ] 38 review_messages = list(snapshot) + [ 39 {"role": "user", "content": "<review prompt>"}, 40 _tool_msg("call_new", new_payload), 41 ] 42 43 actions = _summarize(review_messages, snapshot) 44 45 assert "Cron job 'remind-me' created." not in actions 46 assert "User profile updated" in actions 47 48 49 def test_includes_genuinely_new_actions(): 50 new_payload = { 51 "success": True, 52 "message": "Memory entry created.", 53 } 54 review_messages = [_tool_msg("call_new", new_payload)] 55 56 actions = _summarize(review_messages, prior_snapshot=[]) 57 58 assert actions == ["Memory entry created."] 59 60 61 def test_falls_back_to_content_equality_when_tool_call_id_missing(): 62 """If a tool message has no tool_call_id, match prior entries by content.""" 63 payload = {"success": True, "message": "Cron job 'X' created."} 64 raw = json.dumps(payload) 65 prior_msg = {"role": "tool", "content": raw} # no tool_call_id 66 review_messages = [ 67 {"role": "tool", "content": raw}, # same content -> stale, skip 68 _tool_msg("call_new", {"success": True, "message": "Skill created."}), 69 ] 70 71 actions = _summarize(review_messages, [prior_msg]) 72 73 assert "Cron job 'X' created." not in actions 74 assert "Skill created." in actions 75 76 77 def test_ignores_failed_tool_results(): 78 bad = {"success": False, "message": "something created but failed"} 79 review_messages = [_tool_msg("call_new", bad)] 80 81 actions = _summarize(review_messages, []) 82 83 assert actions == [] 84 85 86 def test_handles_non_json_tool_content_gracefully(): 87 review_messages = [ 88 {"role": "tool", "tool_call_id": "x", "content": "not-json"}, 89 _tool_msg("call_y", {"success": True, "message": "Memory updated."}), 90 ] 91 92 actions = _summarize(review_messages, []) 93 94 assert actions == ["Memory updated."] 95 96 97 def test_empty_inputs(): 98 assert _summarize([], []) == [] 99 assert _summarize(None, None) == [] 100 101 102 def test_added_message_relabels_by_target(): 103 review_messages = [ 104 _tool_msg( 105 "c1", 106 {"success": True, "message": "Entry added to store.", "target": "memory"}, 107 ) 108 ] 109 110 actions = _summarize(review_messages, []) 111 112 assert actions == ["Memory updated"] 113 114 115 def test_removed_or_replaced_relabels_by_target(): 116 review_messages = [ 117 _tool_msg( 118 "c1", 119 {"success": True, "message": "Entry removed.", "target": "user"}, 120 ), 121 _tool_msg( 122 "c2", 123 {"success": True, "message": "Entry replaced.", "target": "memory"}, 124 ), 125 ] 126 127 actions = _summarize(review_messages, []) 128 129 assert "User profile updated" in actions 130 assert "Memory updated" in actions