/ test / vision_test.go
vision_test.go
 1  package test
 2  
 3  import (
 4  	"context"
 5  	"fmt"
 6  	"testing"
 7  
 8  	"github.com/Kocoro-lab/ShanClaw/internal/tools"
 9  )
10  
11  func TestVisionLoop_ScreenshotReturnsImage(t *testing.T) {
12  	st := &tools.ScreenshotTool{}
13  	result, err := st.Run(context.Background(), `{"target":"fullscreen"}`)
14  	if err != nil {
15  		t.Fatalf("screenshot error: %v", err)
16  	}
17  	if result.IsError {
18  		t.Fatalf("screenshot failed: %s", result.Content)
19  	}
20  	t.Logf("Content: %s", result.Content)
21  	if len(result.Images) == 0 {
22  		t.Fatal("expected at least 1 image block, got 0")
23  	}
24  	img := result.Images[0]
25  	if img.MediaType != "image/png" {
26  		t.Errorf("expected image/png, got %s", img.MediaType)
27  	}
28  	rawBytes := len(img.Data) * 3 / 4
29  	t.Logf("Image: %s, %d KB base64", img.MediaType, rawBytes/1024)
30  	if rawBytes < 1000 {
31  		t.Error("image seems too small — resize may have failed")
32  	}
33  }
34  
35  func TestVisionLoop_ComputerScreenshotAction(t *testing.T) {
36  	// Screenshot action doesn't use ax_server, so nil client is fine
37  	reg, _, cleanup := tools.RegisterLocalTools(nil, nil)
38  	defer cleanup()
39  	ct, _ := reg.Get("computer")
40  	result, err := ct.Run(context.Background(), `{"action":"screenshot"}`)
41  	if err != nil {
42  		t.Fatalf("computer screenshot error: %v", err)
43  	}
44  	if result.IsError {
45  		t.Fatalf("computer screenshot failed: %s", result.Content)
46  	}
47  	t.Logf("Content: %s", result.Content)
48  	if len(result.Images) == 0 {
49  		t.Fatal("expected image from computer screenshot action")
50  	}
51  	t.Logf("Image: %d KB", len(result.Images[0].Data)*3/4/1024)
52  }
53  
54  func TestVisionLoop_ComputerNativeLeftClick(t *testing.T) {
55  	// Test that Anthropic native left_click with coordinate array parses correctly
56  	// Will fail with ax_server error since we're not in a real GUI context,
57  	// but that's fine — it means the action was correctly mapped to "click"
58  	reg, _, cleanup := tools.RegisterLocalTools(nil, nil)
59  	defer cleanup()
60  	ct, _ := reg.Get("computer")
61  	result, err := ct.Run(context.Background(), `{"action":"left_click","coordinate":[640,400]}`)
62  	if err != nil {
63  		t.Fatalf("error: %v", err)
64  	}
65  	t.Logf("Result: %s (isError: %v)", result.Content, result.IsError)
66  	if result.IsError && result.Content == `unknown action: "left_click"` {
67  		t.Fatal("left_click was NOT normalized to click — normalizeArgs not called")
68  	}
69  	fmt.Println("left_click correctly mapped to click action")
70  }