/ internal / daemon / checkpoint_crash_test.go
checkpoint_crash_test.go
  1  package daemon
  2  
  3  import (
  4  	"testing"
  5  	"time"
  6  
  7  	"github.com/Kocoro-lab/ShanClaw/internal/agent"
  8  	"github.com/Kocoro-lab/ShanClaw/internal/client"
  9  	"github.com/Kocoro-lab/ShanClaw/internal/session"
 10  )
 11  
 12  // TestCheckpoint_CrashRecovery_DiskLevel simulates a daemon crash between
 13  // a mid-turn checkpoint and the final save, then reloads the session
 14  // from disk and asserts the partial state is preserved with
 15  // InProgress=true. This is the end-to-end disk-level guarantee of
 16  // Slice 4 that can't be exercised via Desktop UI without actually
 17  // force-quitting at the right moment.
 18  func TestCheckpoint_CrashRecovery_DiskLevel(t *testing.T) {
 19  	dir := t.TempDir()
 20  	mgr := session.NewManager(dir)
 21  	defer mgr.Close()
 22  
 23  	// --- Set up an active session and simulate a pre-loop user append. ---
 24  	sess := mgr.NewSession()
 25  	sess.CWD = dir
 26  	sess.Messages = append(sess.Messages,
 27  		client.Message{Role: "user", Content: client.NewTextContent("do thing")},
 28  	)
 29  	sess.MessageMeta = append(sess.MessageMeta,
 30  		session.MessageMeta{Source: "test", Timestamp: session.TimePtr(time.Now())},
 31  	)
 32  	if err := mgr.Save(); err != nil {
 33  		t.Fatalf("pre-turn save: %v", err)
 34  	}
 35  
 36  	// --- Turn starts: capture baseline (as the daemon runner does). ---
 37  	base := captureTurnBaseline(sess, "test", true)
 38  
 39  	// --- Fire a mid-turn checkpoint: simulates tool batch completion. ---
 40  	loop := agent.NewAgentLoop(nil, agent.NewToolRegistry(), "m", "", 1, 1, 1, nil, nil, nil)
 41  	agent.SetRunMessagesForTest(loop, []client.Message{
 42  		{Role: "user", Content: client.NewTextContent("do thing")},
 43  		{Role: "assistant", Content: client.NewTextContent("[tool_use]")},
 44  		{Role: "user", Content: client.NewTextContent("[tool_result payload]")},
 45  	})
 46  	applyTurnState(sess, loop, nil, base) // no usage provider → messages only
 47  	sess.InProgress = true
 48  	if err := mgr.Save(); err != nil {
 49  		t.Fatalf("mid-turn checkpoint save: %v", err)
 50  	}
 51  
 52  	// --- DAEMON CRASHES HERE. No final save. ---
 53  	sessionID := sess.ID
 54  	mgr.Close() // drops in-memory state
 55  
 56  	// --- Recovery: reload manager + session from disk. ---
 57  	mgr2 := session.NewManager(dir)
 58  	defer mgr2.Close()
 59  	reloaded, err := mgr2.Load(sessionID)
 60  	if err != nil {
 61  		t.Fatalf("reload: %v", err)
 62  	}
 63  
 64  	// 1. InProgress flag survives on disk.
 65  	if !reloaded.InProgress {
 66  		t.Fatal("expected InProgress=true on crash-recovered session — partial state would be invisible")
 67  	}
 68  
 69  	// 2. Partial transcript is preserved (baseline + tool batch).
 70  	if got := len(reloaded.Messages); got != 3 {
 71  		t.Fatalf("want 3 messages (1 baseline + 2 tool batch), got %d", got)
 72  	}
 73  	if reloaded.Messages[1].Content.Text() != "[tool_use]" {
 74  		t.Fatalf("tool_use message missing or wrong: %q", reloaded.Messages[1].Content.Text())
 75  	}
 76  	if reloaded.Messages[2].Content.Text() != "[tool_result payload]" {
 77  		t.Fatalf("tool_result payload lost")
 78  	}
 79  
 80  	// 3. MessageMeta tracks messages (no drift).
 81  	if len(reloaded.MessageMeta) != len(reloaded.Messages) {
 82  		t.Fatalf("meta drift: %d messages vs %d meta", len(reloaded.Messages), len(reloaded.MessageMeta))
 83  	}
 84  }
 85  
 86  // TestCheckpoint_ResumeAfterCrash_FinalSaveClears is the companion:
 87  // the resumed session, once it completes its next turn cleanly, must
 88  // end with InProgress=false — proving the flag is a reliable signal
 89  // (not a sticky one-way marker).
 90  func TestCheckpoint_ResumeAfterCrash_FinalSaveClears(t *testing.T) {
 91  	dir := t.TempDir()
 92  	mgr := session.NewManager(dir)
 93  	defer mgr.Close()
 94  
 95  	// Simulate a previously-crashed session on disk.
 96  	sess := mgr.NewSession()
 97  	sess.CWD = dir
 98  	sess.InProgress = true
 99  	sess.Messages = []client.Message{
100  		{Role: "user", Content: client.NewTextContent("earlier prompt")},
101  		{Role: "assistant", Content: client.NewTextContent("[partial]")},
102  	}
103  	sess.MessageMeta = []session.MessageMeta{
104  		{Source: "test"}, {Source: "test"},
105  	}
106  	if err := mgr.Save(); err != nil {
107  		t.Fatalf("save crashed state: %v", err)
108  	}
109  	sessID := sess.ID
110  	mgr.Close()
111  
112  	// Reload and run a fresh clean turn.
113  	mgr2 := session.NewManager(dir)
114  	defer mgr2.Close()
115  	_, err := mgr2.Resume(sessID)
116  	if err != nil {
117  		t.Fatalf("resume: %v", err)
118  	}
119  	current := mgr2.Current()
120  	if !current.InProgress {
121  		t.Fatal("resumed session should start with InProgress=true from disk")
122  	}
123  
124  	// Daemon runs a successful turn — the final-save path clears the flag.
125  	current.InProgress = false
126  	if err := mgr2.Save(); err != nil {
127  		t.Fatalf("clean final save: %v", err)
128  	}
129  
130  	// Reload once more to prove the flag went to disk.
131  	mgr3 := session.NewManager(dir)
132  	defer mgr3.Close()
133  	final, err := mgr3.Load(sessID)
134  	if err != nil {
135  		t.Fatalf("final reload: %v", err)
136  	}
137  	if final.InProgress {
138  		t.Fatal("InProgress=true persisted across clean final save — flag is sticky (bug)")
139  	}
140  }