test_review_prompt_class_first.py
1 """Behavior tests for the skill review / combined review prompts. 2 3 The review prompts steer the background review agent toward actively updating 4 the skill library after most sessions, with a strong bias toward: 5 1. Patching currently-loaded skills first, 6 2. Patching existing umbrellas next, 7 3. Adding references/ files under an existing umbrella, 8 4. Creating a new class-level umbrella only when nothing else fits. 9 10 User-preference corrections (style, format, verbosity, legibility) are 11 first-class skill signals, not just memory signals. 12 13 These tests assert behavioral *instructions* are present — they do NOT 14 snapshot the full prompt text (change-detector). 15 """ 16 17 from run_agent import AIAgent 18 19 20 # --------------------------------------------------------------------------- 21 # _SKILL_REVIEW_PROMPT 22 # --------------------------------------------------------------------------- 23 24 def test_skill_review_prompt_biases_toward_active_updates(): 25 """Prompt must frame updating as the default stance, not something rare.""" 26 prompt = AIAgent._SKILL_REVIEW_PROMPT 27 assert "ACTIVE" in prompt or "active" in prompt.lower(), ( 28 "must tell the reviewer to be active" 29 ) 30 # "missed learning opportunity" or equivalent framing for not acting 31 assert "missed" in prompt.lower() or "opportunity" in prompt.lower(), ( 32 "must frame inaction as a miss, not a neutral outcome" 33 ) 34 35 36 def test_skill_review_prompt_treats_user_corrections_as_skill_signal(): 37 """Style/format/verbosity complaints must be FIRST-CLASS skill signals, not just memory.""" 38 prompt = AIAgent._SKILL_REVIEW_PROMPT 39 lower = prompt.lower() 40 # Must mention style/format/verbosity-family corrections 41 assert any(k in lower for k in ("style", "format", "verbos", "legib", "tone")), ( 42 "must name style/format/verbosity/legibility as signals" 43 ) 44 # Must frame these as first-class skill signals (not memory-only) 45 assert "FIRST-CLASS" in prompt or "first-class" in prompt, ( 46 "must explicitly label user-preference corrections as first-class skill signals" 47 ) 48 # Must mention the correction-type phrases to tune the model's ear 49 assert "stop doing" in lower or "don't" in lower or "hate" in lower or "frustrat" in lower, ( 50 "must give concrete phrasing examples so the model recognizes corrections" 51 ) 52 53 54 def test_skill_review_prompt_prefers_loaded_skills_first(): 55 """Currently-loaded skills must be the first patch target.""" 56 prompt = AIAgent._SKILL_REVIEW_PROMPT 57 assert "LOADED" in prompt or "loaded" in prompt, ( 58 "must mention currently-loaded skills" 59 ) 60 # Must name the mechanisms for detecting loaded skills 61 assert "skill_view" in prompt and "/skill" in prompt, ( 62 "must name skill_view and /skill-name as loaded-skill signals" 63 ) 64 65 66 def test_skill_review_prompt_has_four_step_preference_order(): 67 """The 4-step patch/support-file/create ladder must be present.""" 68 prompt = AIAgent._SKILL_REVIEW_PROMPT 69 assert "PATCH" in prompt 70 assert "references/" in prompt or "REFERENCE" in prompt 71 assert "CREATE" in prompt 72 assert "UMBRELLA" in prompt or "umbrella" in prompt 73 74 75 def test_skill_review_prompt_names_three_support_file_kinds(): 76 """Support-file step must name references/, templates/, and scripts/.""" 77 prompt = AIAgent._SKILL_REVIEW_PROMPT 78 assert "references/" in prompt, "must name references/ as a support-file kind" 79 assert "templates/" in prompt, "must name templates/ as a support-file kind" 80 assert "scripts/" in prompt, "must name scripts/ as a support-file kind" 81 # Purpose hints for each kind 82 assert "knowledge" in prompt.lower() or "research" in prompt.lower() or "API docs" in prompt, ( 83 "must mention knowledge-bank / research / API-docs role of references/" 84 ) 85 assert "copied" in prompt.lower() or "starter" in prompt.lower() or "reproduce" in prompt.lower(), ( 86 "must mention that templates/ are starter files to copy/modify" 87 ) 88 assert "re-runnable" in prompt.lower() or "verification" in prompt.lower() or "probe" in prompt.lower(), ( 89 "must mention that scripts/ are re-runnable actions" 90 ) 91 92 93 def test_skill_review_prompt_has_name_veto_for_create(): 94 """Creating a new skill must be gated behind class-level naming.""" 95 prompt = AIAgent._SKILL_REVIEW_PROMPT 96 assert "class level" in prompt.lower() or "CLASS-LEVEL" in prompt 97 assert "MUST NOT" in prompt or "must not" in prompt, ( 98 "must have a name-veto clause blocking session-artifact names" 99 ) 100 101 102 def test_skill_review_prompt_embeds_user_preferences_in_skills(): 103 """Must explicitly say user-preference lessons belong in SKILL.md, not only memory.""" 104 prompt = AIAgent._SKILL_REVIEW_PROMPT 105 lower = prompt.lower() 106 assert "preference" in lower, "must mention user preferences" 107 assert "memory" in lower and "skill" in lower, ( 108 "must contrast memory vs skill responsibilities" 109 ) 110 111 112 def test_skill_review_prompt_flags_overlap_and_defers_to_curator(): 113 """Reviewer should not consolidate live; flag overlap for the curator.""" 114 prompt = AIAgent._SKILL_REVIEW_PROMPT 115 assert "overlap" in prompt.lower() 116 assert "curator" in prompt.lower(), "must defer consolidation to the curator" 117 118 119 def test_skill_review_prompt_still_has_opt_out_clause(): 120 """'Nothing to save.' must remain as a real-but-not-default option.""" 121 prompt = AIAgent._SKILL_REVIEW_PROMPT 122 assert "Nothing to save." in prompt 123 124 125 # --------------------------------------------------------------------------- 126 # _COMBINED_REVIEW_PROMPT 127 # --------------------------------------------------------------------------- 128 129 def test_combined_review_prompt_has_memory_section(): 130 """Memory half must still cover user facts and preferences.""" 131 prompt = AIAgent._COMBINED_REVIEW_PROMPT 132 assert "**Memory**" in prompt 133 assert "memory tool" in prompt 134 135 136 def test_combined_review_prompt_skills_biased_toward_active_updates(): 137 """Skills half must carry the active-update bias.""" 138 prompt = AIAgent._COMBINED_REVIEW_PROMPT 139 assert "**Skills**" in prompt 140 assert "ACTIVE" in prompt or "active" in prompt.lower() 141 assert "missed" in prompt.lower() or "opportunity" in prompt.lower() 142 143 144 def test_combined_review_prompt_treats_user_corrections_as_skill_signal(): 145 """Combined prompt must carry the same user-preference-is-skill-signal rule.""" 146 prompt = AIAgent._COMBINED_REVIEW_PROMPT 147 lower = prompt.lower() 148 assert any(k in lower for k in ("style", "format", "verbos", "legib", "tone")) 149 assert "FIRST-CLASS" in prompt or "first-class" in prompt 150 151 152 def test_combined_review_prompt_prefers_loaded_skills_first(): 153 """Combined prompt must also prefer loaded skills first.""" 154 prompt = AIAgent._COMBINED_REVIEW_PROMPT 155 assert "LOADED" in prompt or "loaded" in prompt 156 assert "skill_view" in prompt and "/skill" in prompt 157 158 159 def test_combined_review_prompt_has_four_step_skill_ladder(): 160 """Combined prompt must keep the patch/support-file/create ladder on the Skills half.""" 161 prompt = AIAgent._COMBINED_REVIEW_PROMPT 162 assert "PATCH" in prompt 163 assert "references/" in prompt or "REFERENCE" in prompt 164 assert "CREATE" in prompt 165 assert "CLASS-LEVEL" in prompt or "class-level" in prompt or "class level" in prompt.lower() 166 167 168 def test_combined_review_prompt_names_three_support_file_kinds(): 169 """Combined prompt must also name all three support-file kinds.""" 170 prompt = AIAgent._COMBINED_REVIEW_PROMPT 171 assert "references/" in prompt 172 assert "templates/" in prompt 173 assert "scripts/" in prompt 174 175 176 def test_combined_review_prompt_preserves_opt_out_clause(): 177 prompt = AIAgent._COMBINED_REVIEW_PROMPT 178 assert "Nothing to save." in prompt 179 180 181 # --------------------------------------------------------------------------- 182 # _MEMORY_REVIEW_PROMPT — unchanged, still memory-focused 183 # --------------------------------------------------------------------------- 184 185 def test_memory_review_prompt_still_focused_on_user_facts(): 186 """Memory-only review prompt stays focused on user facts — not touched by this change.""" 187 prompt = AIAgent._MEMORY_REVIEW_PROMPT 188 # The memory-only prompt should NOT drift into skill territory 189 assert "skills_list" not in prompt 190 assert "SURVEY" not in prompt 191 assert "memory tool" in prompt