live_test.go
1 package e2e 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "net/http" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "strings" 12 "testing" 13 "time" 14 ) 15 16 // Live E2E tests require SHANNON_E2E_LIVE=1. 17 // They make real LLM API calls and cost real tokens. 18 // 19 // Known limitation: daemon tests use the real ~/.shannon home and port 7533. 20 // Do not run while a real daemon is active. Future improvement: temp HOME + 21 // isolated port via env var override. 22 23 func TestLive_OneShot_BasicQuery(t *testing.T) { 24 skipUnlessLive(t) 25 bin := testBinary(t) 26 27 out := runShan(t, bin, "what is 2+1") 28 if !strings.Contains(out, "3") { 29 t.Errorf("expected answer containing '3', got: %s", out) 30 } 31 // Should use Anthropic model, not GPT fallback 32 if strings.Contains(out, "gpt-5-mini") { 33 t.Error("should not fall back to gpt-5-mini — check cache_break fix") 34 } 35 } 36 37 func TestLive_OneShot_AutoApproveToolUse(t *testing.T) { 38 skipUnlessLive(t) 39 bin := testBinary(t) 40 41 out := runShan(t, bin, "-y", "list files in the current directory") 42 if !strings.Contains(out, "directory_list") && !strings.Contains(out, "bash") { 43 t.Error("expected tool call (directory_list or bash)") 44 } 45 } 46 47 func TestLive_OneShot_SessionCWD(t *testing.T) { 48 skipUnlessLive(t) 49 bin := testBinary(t) 50 51 tmpDir := t.TempDir() 52 cmd := exec.Command(bin, "-y", "run pwd") 53 cmd.Dir = tmpDir 54 var stdout bytes.Buffer 55 cmd.Stdout = &stdout 56 cmd.Stderr = &stdout 57 if err := cmd.Run(); err != nil { 58 t.Fatalf("shan failed: %v\n%s", err, stdout.String()) 59 } 60 61 // Compare against the actual directory we set, resolving symlinks 62 // (macOS: /tmp → /private/tmp, /var → /private/var) 63 expected, _ := filepath.EvalSymlinks(tmpDir) 64 out := stdout.String() 65 if !strings.Contains(out, expected) && !strings.Contains(out, tmpDir) { 66 t.Errorf("expected CWD %q or %q in output, got: %s", expected, tmpDir, out) 67 } 68 } 69 70 func TestLive_BundledAgent_Explorer(t *testing.T) { 71 skipUnlessLive(t) 72 bin := testBinary(t) 73 74 out := runShan(t, bin, "--agent", "explorer", "what files are in this project") 75 // Explorer should use read-only tools 76 if strings.Contains(out, "file_write") || strings.Contains(out, "file_edit") { 77 t.Error("explorer should not use write tools") 78 } 79 } 80 81 func TestLive_BundledAgent_Reviewer(t *testing.T) { 82 skipUnlessLive(t) 83 bin := testBinary(t) 84 85 out := runShan(t, bin, "--agent", "reviewer", "review main.go") 86 if !strings.Contains(out, "file_read") { 87 t.Error("reviewer should read files") 88 } 89 } 90 91 func TestLive_Daemon_MessageAndEditRetry(t *testing.T) { 92 skipUnlessLive(t) 93 t.Skip("daemon tests use real ~/.shannon and port 7533 — skipped until daemon supports --port/--home isolation") 94 bin := testBinary(t) 95 96 // Start daemon 97 daemonCmd := exec.Command(bin, "daemon", "start") 98 daemonCmd.Stdout = os.Stderr 99 daemonCmd.Stderr = os.Stderr 100 if err := daemonCmd.Start(); err != nil { 101 t.Fatalf("daemon start: %v", err) 102 } 103 defer func() { 104 exec.Command(bin, "daemon", "stop").Run() 105 daemonCmd.Wait() 106 }() 107 108 // Wait for daemon to be ready 109 waitForDaemon(t, 10*time.Second) 110 111 // Send message 112 resp := httpPost(t, "http://localhost:7533/message", map[string]interface{}{ 113 "text": "what is 7+7", 114 }) 115 sessionID, ok := resp["session_id"].(string) 116 if !ok || sessionID == "" { 117 t.Fatalf("no session_id in response: %v", resp) 118 } 119 reply, _ := resp["reply"].(string) 120 if !strings.Contains(reply, "14") { 121 t.Errorf("expected 14 in reply, got: %s", reply) 122 } 123 124 // GET session 125 sessResp := httpGet(t, fmt.Sprintf("http://localhost:7533/sessions/%s", sessionID)) 126 messages, ok := sessResp["messages"].([]interface{}) 127 if !ok || len(messages) < 2 { 128 t.Fatalf("expected at least 2 messages, got: %v", sessResp) 129 } 130 131 // Edit & retry 132 editResp := httpPost(t, fmt.Sprintf("http://localhost:7533/sessions/%s/edit", sessionID), map[string]interface{}{ 133 "message_index": 0, 134 "new_content": "what is 9+9", 135 }) 136 editReply, _ := editResp["reply"].(string) 137 if !strings.Contains(editReply, "18") { 138 t.Errorf("expected 18 in edit reply, got: %s", editReply) 139 } 140 141 // Verify truncation 142 sessResp2 := httpGet(t, fmt.Sprintf("http://localhost:7533/sessions/%s", sessionID)) 143 messages2, _ := sessResp2["messages"].([]interface{}) 144 if len(messages2) != 2 { 145 t.Errorf("expected 2 messages after edit, got %d", len(messages2)) 146 } 147 } 148 149 func TestLive_Daemon_AgentListIncludesBuiltins(t *testing.T) { 150 skipUnlessLive(t) 151 t.Skip("daemon tests use real ~/.shannon and port 7533 — skipped until daemon supports --port/--home isolation") 152 bin := testBinary(t) 153 154 daemonCmd := exec.Command(bin, "daemon", "start") 155 daemonCmd.Stdout = os.Stderr 156 daemonCmd.Stderr = os.Stderr 157 if err := daemonCmd.Start(); err != nil { 158 t.Fatalf("daemon start: %v", err) 159 } 160 defer func() { 161 exec.Command(bin, "daemon", "stop").Run() 162 daemonCmd.Wait() 163 }() 164 165 waitForDaemon(t, 10*time.Second) 166 167 resp := httpGet(t, "http://localhost:7533/agents") 168 agentsList, ok := resp["agents"].([]interface{}) 169 if !ok { 170 t.Fatalf("expected agents array: %v", resp) 171 } 172 173 builtins := map[string]bool{} 174 for _, a := range agentsList { 175 m, _ := a.(map[string]interface{}) 176 if b, _ := m["builtin"].(bool); b { 177 builtins[m["name"].(string)] = true 178 } 179 } 180 for _, name := range []string{"explorer", "reviewer"} { 181 if !builtins[name] { 182 t.Errorf("expected builtin agent %q", name) 183 } 184 } 185 } 186 187 // ---------- helpers ---------- 188 189 func runShan(t *testing.T, bin string, args ...string) string { 190 t.Helper() 191 cmd := exec.Command(bin, args...) 192 var stdout bytes.Buffer 193 cmd.Stdout = &stdout 194 cmd.Stderr = &stdout 195 if err := cmd.Run(); err != nil { 196 t.Fatalf("shan %v failed: %v\n%s", args, err, stdout.String()) 197 } 198 return stdout.String() 199 } 200 201 func waitForDaemon(t *testing.T, timeout time.Duration) { 202 t.Helper() 203 deadline := time.Now().Add(timeout) 204 for time.Now().Before(deadline) { 205 resp, err := http.Get("http://localhost:7533/health") 206 if err == nil { 207 resp.Body.Close() 208 if resp.StatusCode == 200 { 209 return 210 } 211 } 212 time.Sleep(500 * time.Millisecond) 213 } 214 t.Fatal("daemon did not become ready within timeout") 215 } 216 217 func httpGet(t *testing.T, url string) map[string]interface{} { 218 t.Helper() 219 resp, err := http.Get(url) 220 if err != nil { 221 t.Fatalf("GET %s: %v", url, err) 222 } 223 defer resp.Body.Close() 224 var result map[string]interface{} 225 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 226 t.Fatalf("decode GET %s: %v", url, err) 227 } 228 return result 229 } 230 231 func httpPost(t *testing.T, url string, body map[string]interface{}) map[string]interface{} { 232 t.Helper() 233 b, _ := json.Marshal(body) 234 resp, err := http.Post(url, "application/json", bytes.NewReader(b)) 235 if err != nil { 236 t.Fatalf("POST %s: %v", url, err) 237 } 238 defer resp.Body.Close() 239 var result map[string]interface{} 240 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { 241 t.Fatalf("decode POST %s: %v", url, err) 242 } 243 return result 244 }