/ internal / prompt / builder.go
builder.go
  1  package prompt
  2  
  3  import (
  4  	"fmt"
  5  	"runtime"
  6  	"strings"
  7  	"time"
  8  
  9  	"github.com/Kocoro-lab/ShanClaw/internal/skills"
 10  )
 11  
 12  // Layer character budgets.
 13  const (
 14  	maxMemoryChars       = 2000
 15  	maxInstructionsChars = 16000
 16  )
 17  
 18  // DeferredToolSummary is a lightweight name+description pair for deferred tool listings.
 19  // Mirrors agent.ToolSummary but avoids importing the agent package from prompt.
 20  type DeferredToolSummary struct {
 21  	Name        string
 22  	Description string
 23  }
 24  
 25  // PromptOptions configures the system prompt assembly.
 26  type PromptOptions struct {
 27  	BasePrompt   string   // persona + core operational rules
 28  	Memory       string   // from LoadMemory (~500 tokens budget) — rendered in VolatileContext
 29  	Instructions string   // from LoadInstructions (~4000 tokens budget) — rendered in StableContext so it joins the cacheable prefix
 30  	ToolNames    []string // from ToolRegistry.SortedNames(), deterministic
 31  	ServerTools  []string // server tool names (optional)
 32  	MCPContext   string   // context from MCP servers (auth info, usage hints)
 33  	Skills       []*skills.Skill
 34  	CWD          string // current working directory
 35  	SessionInfo  string // optional session context (currently unused by agent loop)
 36  	MemoryDir    string // directory containing MEMORY.md for agent memory writes
 37  	// StickyContext holds session-scoped facts injected verbatim into StableContext.
 38  	// Never truncated or compacted. Use for key transactional data (IDs, amounts, names)
 39  	// that must survive context compaction. Populated by the daemon runner with session
 40  	// source/channel/task metadata, or by callers needing persistent session facts.
 41  	StickyContext string
 42  	// DeferredTools lists tools available via tool_search (deferred mode only).
 43  	// Rendered in the static system prompt. Empty when not in deferred mode.
 44  	DeferredTools []DeferredToolSummary
 45  	// ModelID is the model identifier (e.g., "claude-sonnet-4-20250514").
 46  	// Injected into volatile context so the model knows its own identity.
 47  	ModelID string
 48  	// ContextWindow is the model's context window size in tokens.
 49  	// Injected into volatile context when > 0.
 50  	ContextWindow int
 51  	// OutputFormat controls formatting guidance: "markdown" (default, GFM) or
 52  	// "plain" (for cloud-distributed sessions where Shannon Cloud handles
 53  	// final channel rendering). Empty defaults to "markdown".
 54  	OutputFormat string
 55  }
 56  
 57  // PromptParts separates the system prompt into cacheable and volatile sections.
 58  // The gateway caches System as a single block. StableContext and VolatileContext
 59  // are injected into the user message with a <!-- cache_break --> separator.
 60  //
 61  // Layer semantics:
 62  //   - System         : persona, core rules, tool names, skills — gateway-cached.
 63  //   - StableContext  : shared org-wide instructions (instructions.md + rules/*.md +
 64  //                      project overrides) and sticky session facts. Changes only
 65  //                      across sessions or on file edits. Sits before the
 66  //                      cache_break marker in the user message so providers that
 67  //                      reuse the pre-break prefix can hit on it.
 68  //   - VolatileContext: memory (mutated by memory_append mid-session), date/time,
 69  //                      CWD, MCP server context, output format guidance. Sits
 70  //                      after the cache_break marker and is re-sent each turn.
 71  type PromptParts struct {
 72  	System          string // static: persona + rules + guidance + tool names + skills (cached by gateway)
 73  	StableContext   string // per-session cacheable prefix: shared instructions + sticky facts (before cache_break)
 74  	VolatileContext string // changes per-turn: memory, date/time, CWD, MCP, format guidance (after cache_break)
 75  }
 76  
 77  // BuildSystemPrompt assembles prompt parts from layers.
 78  // System contains only content that is stable across turns.
 79  // Shared instructions and sticky facts go to StableContext (cached prefix).
 80  // Volatile content (memory, date/time, CWD, MCP) goes to VolatileContext.
 81  //
 82  // Note: an attempt to move VolatileContext into System (after a
 83  // `<!-- volatile -->` marker) was reverted — it caused tools cache to break
 84  // every minute because the system_volatile bytes sit BEFORE the tools
 85  // cache_control. Baseline placement (volatile in user_1 after cache_break) is
 86  // actually optimal: it only pollutes the rolling marker cache, leaving system
 87  // + tools + user_1.stable caches intact.
 88  func BuildSystemPrompt(opts PromptOptions) PromptParts {
 89  	system := buildStaticSystem(opts)
 90  	stable := buildStableContext(opts)
 91  	volatile := buildVolatileContext(opts)
 92  	return PromptParts{
 93  		System:          system,
 94  		StableContext:   stable,
 95  		VolatileContext: volatile,
 96  	}
 97  }
 98  
 99  // buildStaticSystem assembles content that never changes between turns in a session.
100  func buildStaticSystem(opts PromptOptions) string {
101  	var sb strings.Builder
102  
103  	// 1. Base prompt (persona + core rules — unlimited)
104  	sb.WriteString(opts.BasePrompt)
105  
106  	// Language policy. Byte-stable across all sessions and users so it joins
107  	// the cacheable system prefix. Pairs with the shorter per-turn reminder in
108  	// VolatileContext which re-anchors the rule against long-session drift.
109  	sb.WriteString("\n\n## Language\n")
110  	sb.WriteString("Match the user's language on first contact and stay consistent for the rest of the session. " +
111  		"If the user writes primarily in Chinese, respond in Chinese; if in English, respond in English; " +
112  		"follow the same rule for any other language. Only switch response language when the user explicitly asks " +
113  		"(e.g. \"please reply in English\"). Mixed-language user input — such as one English technical term inside a " +
114  		"Chinese sentence — is NOT a language-switch signal; continue in the established language. " +
115  		"Code identifiers, file paths, CLI commands, and technical terms (API names, library names, error messages) " +
116  		"remain in their original form regardless of response language. " +
117  		"Maintain full orthographic correctness — all accents, diacritics, and special characters.")
118  
119  	// 2. Available Tools (stable once session starts)
120  	sb.WriteString("\n\n## Available Tools\n")
121  	if len(opts.ToolNames) > 0 {
122  		sb.WriteString("You have these tools: ")
123  		sb.WriteString(strings.Join(opts.ToolNames, ", "))
124  		sb.WriteString(".")
125  	}
126  	if len(opts.ServerTools) > 0 {
127  		if len(opts.ToolNames) > 0 {
128  			sb.WriteString("\n")
129  		}
130  		sb.WriteString("You also have server-side tools: ")
131  		sb.WriteString(strings.Join(opts.ServerTools, ", "))
132  		sb.WriteString(".")
133  	}
134  
135  	// Parallel tool-use nudge: agent loops that fire N tool calls across N
136  	// iterations grow msgs past Anthropic's ~20-block auto-lookback window,
137  	// causing CHR decay in long sessions. Batching independent calls into
138  	// ONE response collapses N iterations → 1, keeping the rolling marker
139  	// reachable. Only add when tools are actually registered — tool-less
140  	// agents would just pay extra cached-prefix tokens.
141  	if len(opts.ToolNames) > 0 || len(opts.ServerTools) > 0 {
142  		sb.WriteString("\n\nWhen you need independent pieces of information " +
143  			"(read multiple files, check several conditions, fetch data from different sources), " +
144  			"prefer calling ALL the tools in a SINGLE response with multiple parallel tool_use blocks " +
145  			"rather than across sequential turns. This amortizes prompt-cache cost and reduces latency.\n" +
146  			"Example — INEFFICIENT (3 turns):\n" +
147  			"  turn 1: file_read A\n" +
148  			"  turn 2: file_read B\n" +
149  			"  turn 3: file_read C\n" +
150  			"Example — EFFICIENT (1 turn, 3 parallel tool_use blocks in one response):\n" +
151  			"  turn 1: file_read A + file_read B + file_read C\n" +
152  			"Only sequence when later calls genuinely depend on earlier results.")
153  	}
154  
155  	// Skills are listed in the user message (not system prompt) to preserve
156  	// cache prefix stability. See buildSkillListing() in loop.go.
157  
158  	// 3b. Deferred Tools (only in deferred mode) — name + truncated description.
159  	// Model calls tool_search to load full schemas on demand.
160  	if len(opts.DeferredTools) > 0 {
161  		sb.WriteString("\n\n## Deferred Tools\n")
162  		sb.WriteString("Load via `tool_search` when needed, then immediately call the loaded tool.\n")
163  		for _, dt := range opts.DeferredTools {
164  			desc := dt.Description
165  			if len(desc) > 60 {
166  				desc = desc[:57] + "..."
167  			}
168  			fmt.Fprintf(&sb, "- %s: %s\n", dt.Name, desc)
169  		}
170  	}
171  
172  	// 4. macOS automation guidance (only on darwin with relevant tools)
173  	if guidance := macOSAutomationGuidance(opts.ToolNames); guidance != "" {
174  		sb.WriteString("\n\n")
175  		sb.WriteString(guidance)
176  	}
177  
178  	// 5. Memory Persistence guidance (stable — depends only on memoryDir presence)
179  	if opts.MemoryDir != "" {
180  		sb.WriteString("\n\n## Memory Persistence\n")
181  		sb.WriteString("Your current memory is shown in the context section below. When you discover something worth remembering across future conversations, use the `memory_append` tool to add new entries.\n")
182  		sb.WriteString("IMPORTANT: NEVER use file_write or file_edit on MEMORY.md — they race under concurrent sessions. The memory_append tool is flock-protected and safe.\n")
183  		sb.WriteString("Good candidates for memory:\n")
184  		sb.WriteString("- Decisions the user made (technical, design, or preferences)\n")
185  		sb.WriteString("- User corrections about how they want to work\n")
186  		sb.WriteString("- Important facts about projects, people, or systems\n")
187  		sb.WriteString("- Patterns, gotchas, or insights you discovered together\n")
188  		sb.WriteString("- Configuration or reference information that was hard to find\n\n")
189  		sb.WriteString("Keep entries as short one-line bullets. Do NOT save ephemeral task status, code snippets, or things already documented in project files. Your context is automatically compacted in long sessions — anything not written to memory may be lost.")
190  	}
191  
192  	return sb.String()
193  }
194  
195  // buildStableContext assembles the cacheable per-session prefix: shared
196  // instructions followed by sticky session facts. Placed before the
197  // <!-- cache_break --> marker in the user message so providers that reuse the
198  // pre-break prefix have a chance to cache-hit on it within a session.
199  //
200  // Ordering: instructions come first because they're the more stable of the
201  // two — file-backed and rarely edited — while sticky facts vary per session
202  // source. Putting the stabler content first gives the gateway/provider more
203  // opportunity to extend a cached prefix. Whether that actually produces a
204  // cross-session cache hit depends on upstream gateway/provider behavior and
205  // on the rest of the prompt state matching, not just the instructions text.
206  //
207  // Truncation: shared instructions are bounded by maxInstructionsChars to keep
208  // the cached prefix within a predictable budget. Oversized content is trimmed
209  // with a [truncated] marker telling the author to reduce file content.
210  func buildStableContext(opts PromptOptions) string {
211  	var sb strings.Builder
212  
213  	if inst := strings.TrimSpace(opts.Instructions); inst != "" {
214  		sb.WriteString("## Instructions\n")
215  		sb.WriteString(truncate(inst, maxInstructionsChars))
216  	}
217  
218  	if sticky := strings.TrimSpace(opts.StickyContext); sticky != "" {
219  		if sb.Len() > 0 {
220  			sb.WriteString("\n\n")
221  		}
222  		sb.WriteString("## Session Facts\n")
223  		sb.WriteString(sticky)
224  	}
225  
226  	// Guarantee a non-empty stable prefix so the gateway attaches a third
227  	// cache_control breakpoint (on the user message stable block). When this
228  	// is empty the gateway's Anthropic provider falls through its
229  	// empty-text-block guard and skips the breakpoint entirely, leaving the
230  	// user message uncached. The literal text is stable across all sessions
231  	// (no time, no IDs) so the extra bytes go into a shareable cached prefix.
232  	if sb.Len() == 0 {
233  		sb.WriteString("## Session\nActive agent context.")
234  	}
235  
236  	return sb.String()
237  }
238  
239  // buildVolatileContext assembles content that changes between turns.
240  // Placed after the <!-- cache_break --> marker in the user message.
241  func buildVolatileContext(opts PromptOptions) string {
242  	var sb strings.Builder
243  
244  	// Date/time + CWD + model identity + session info
245  	sb.WriteString("## Context\n")
246  	sb.WriteString("Current date: " + time.Now().Format("2006-01-02 15:04 MST"))
247  	if opts.CWD != "" {
248  		sb.WriteString("\nWorking directory: " + opts.CWD)
249  	}
250  	if opts.ModelID != "" {
251  		sb.WriteString("\nModel: " + opts.ModelID)
252  	}
253  	if opts.ContextWindow > 0 {
254  		sb.WriteString(fmt.Sprintf("\nContext window: %d tokens", opts.ContextWindow))
255  	}
256  	if opts.SessionInfo != "" {
257  		sb.WriteString("\n" + opts.SessionInfo)
258  	}
259  
260  	// Output formatting guidance
261  	sb.WriteString("\n\n## Output Format\n")
262  	sb.WriteString(formatGuidance(opts.OutputFormat))
263  
264  	// Per-turn language reminder. Short reinforcement of the full policy in the
265  	// System section. Byte-stable (same text every turn) so it does not fragment
266  	// any per-turn cache, but positioning near the user message anchors against
267  	// drift when long sessions accumulate English tool output.
268  	//
269  	// On turn 0 "the language already established" is vacuous — nothing has been
270  	// established yet. The static System section's "Match the user's language on
271  	// first contact" rule handles that case; this reminder takes over from turn 1
272  	// onward when there's actually an established language to stay consistent with.
273  	sb.WriteString("\n\n## Language\n")
274  	sb.WriteString("Respond in the language already established with the user in this session. " +
275  		"If the user has not asked for a different language, stay consistent — do not switch even when tool output, " +
276  		"skill descriptions, or system messages arrive in a different language. Keep code and technical identifiers in their original form.")
277  
278  	// Memory — stays volatile: memory_append can mutate MEMORY.md during a
279  	// turn, so the block must be re-read and re-sent each Run(). Instructions
280  	// live in StableContext (cacheable prefix), not here.
281  	if mem := strings.TrimSpace(opts.Memory); mem != "" {
282  		sb.WriteString("\n\n## Memory\n")
283  		sb.WriteString(truncate(mem, maxMemoryChars))
284  	}
285  
286  	// MCP server context
287  	if mcp := strings.TrimSpace(opts.MCPContext); mcp != "" {
288  		sb.WriteString("\n\n## MCP Server Context\n")
289  		sb.WriteString(mcp)
290  	}
291  
292  	return sb.String()
293  }
294  
295  // formatGuidance returns output formatting instructions based on the profile.
296  func formatGuidance(format string) string {
297  	switch format {
298  	case "plain":
299  		return "Format responses as plain text. Use short paragraphs and simple bullet points. " +
300  			"Avoid markdown tables, fenced code blocks, headers, bold/italic, and other rich formatting. " +
301  			"Use indentation or blank lines for structure. Keep lines short and readable."
302  	default: // "markdown" or empty
303  		return "Format text responses using GitHub-flavored markdown (GFM): " +
304  			"use headers, fenced code blocks with language tags, lists, bold/italic, and tables where appropriate."
305  	}
306  }
307  
308  // truncate limits s to maxChars, appending [truncated] if trimmed.
309  func truncate(s string, maxChars int) string {
310  	r := []rune(s)
311  	if len(r) <= maxChars {
312  		return s
313  	}
314  	return string(r[:maxChars]) + "\n[truncated]"
315  }
316  
317  // macOSAutomationGuidance returns workflow guidance for macOS automation tools,
318  // or empty string if not on darwin or no relevant tools are registered.
319  // Each bullet is conditional on the actual tool presence to avoid emitting
320  // guidance for tools the session won't use.
321  func macOSAutomationGuidance(toolNames []string) string {
322  	if runtime.GOOS != "darwin" {
323  		return ""
324  	}
325  	has := func(name string) bool {
326  		for _, n := range toolNames {
327  			if n == name {
328  				return true
329  			}
330  		}
331  		return false
332  	}
333  	var bullets strings.Builder
334  	if has("accessibility") {
335  		bullets.WriteString("- Prefer `accessibility` (AX API) over `computer` for UI interactions — faster, no screenshot needed.\n")
336  		bullets.WriteString("- After annotate or read_tree, click elements by ref (e.g. ref=\"e14\"). Only use coordinate clicks as a last resort.\n")
337  		bullets.WriteString("- Always include the app parameter. Use the exact name as shown in the Dock.\n")
338  		bullets.WriteString("- Ensure the target app is frontmost before typing. Use accessibility click on the target field first.\n")
339  	}
340  	if has("computer") && has("accessibility") {
341  		bullets.WriteString("- Fall back to `computer` only when AX fails or the target is a canvas/web element.\n")
342  	}
343  	if has("browser") {
344  		bullets.WriteString("- For interacting with web page elements, use `browser` (DOM-level access). Use accessibility only for native macOS UI.\n")
345  	}
346  	if has("wait_for") {
347  		bullets.WriteString("- Use `wait_for` to poll for UI state instead of bash sleep.\n")
348  	}
349  	if bullets.Len() == 0 {
350  		return ""
351  	}
352  	return "## macOS Automation\n" + bullets.String()
353  }