builder.go
1 package prompt 2 3 import ( 4 "fmt" 5 "runtime" 6 "strings" 7 "time" 8 9 "github.com/Kocoro-lab/ShanClaw/internal/skills" 10 ) 11 12 // Layer character budgets. 13 const ( 14 maxMemoryChars = 2000 15 maxInstructionsChars = 16000 16 ) 17 18 // DeferredToolSummary is a lightweight name+description pair for deferred tool listings. 19 // Mirrors agent.ToolSummary but avoids importing the agent package from prompt. 20 type DeferredToolSummary struct { 21 Name string 22 Description string 23 } 24 25 // PromptOptions configures the system prompt assembly. 26 type PromptOptions struct { 27 BasePrompt string // persona + core operational rules 28 Memory string // from LoadMemory (~500 tokens budget) — rendered in VolatileContext 29 Instructions string // from LoadInstructions (~4000 tokens budget) — rendered in StableContext so it joins the cacheable prefix 30 ToolNames []string // from ToolRegistry.SortedNames(), deterministic 31 ServerTools []string // server tool names (optional) 32 MCPContext string // context from MCP servers (auth info, usage hints) 33 Skills []*skills.Skill 34 CWD string // current working directory 35 SessionInfo string // optional session context (currently unused by agent loop) 36 MemoryDir string // directory containing MEMORY.md for agent memory writes 37 // StickyContext holds session-scoped facts injected verbatim into StableContext. 38 // Never truncated or compacted. Use for key transactional data (IDs, amounts, names) 39 // that must survive context compaction. Populated by the daemon runner with session 40 // source/channel/task metadata, or by callers needing persistent session facts. 41 StickyContext string 42 // DeferredTools lists tools available via tool_search (deferred mode only). 43 // Rendered in the static system prompt. Empty when not in deferred mode. 44 DeferredTools []DeferredToolSummary 45 // ModelID is the model identifier (e.g., "claude-sonnet-4-20250514"). 46 // Injected into volatile context so the model knows its own identity. 47 ModelID string 48 // ContextWindow is the model's context window size in tokens. 49 // Injected into volatile context when > 0. 50 ContextWindow int 51 // OutputFormat controls formatting guidance: "markdown" (default, GFM) or 52 // "plain" (for cloud-distributed sessions where Shannon Cloud handles 53 // final channel rendering). Empty defaults to "markdown". 54 OutputFormat string 55 } 56 57 // PromptParts separates the system prompt into cacheable and volatile sections. 58 // The gateway caches System as a single block. StableContext and VolatileContext 59 // are injected into the user message with a <!-- cache_break --> separator. 60 // 61 // Layer semantics: 62 // - System : persona, core rules, tool names, skills — gateway-cached. 63 // - StableContext : shared org-wide instructions (instructions.md + rules/*.md + 64 // project overrides) and sticky session facts. Changes only 65 // across sessions or on file edits. Sits before the 66 // cache_break marker in the user message so providers that 67 // reuse the pre-break prefix can hit on it. 68 // - VolatileContext: memory (mutated by memory_append mid-session), date/time, 69 // CWD, MCP server context, output format guidance. Sits 70 // after the cache_break marker and is re-sent each turn. 71 type PromptParts struct { 72 System string // static: persona + rules + guidance + tool names + skills (cached by gateway) 73 StableContext string // per-session cacheable prefix: shared instructions + sticky facts (before cache_break) 74 VolatileContext string // changes per-turn: memory, date/time, CWD, MCP, format guidance (after cache_break) 75 } 76 77 // BuildSystemPrompt assembles prompt parts from layers. 78 // System contains only content that is stable across turns. 79 // Shared instructions and sticky facts go to StableContext (cached prefix). 80 // Volatile content (memory, date/time, CWD, MCP) goes to VolatileContext. 81 // 82 // Note: an attempt to move VolatileContext into System (after a 83 // `<!-- volatile -->` marker) was reverted — it caused tools cache to break 84 // every minute because the system_volatile bytes sit BEFORE the tools 85 // cache_control. Baseline placement (volatile in user_1 after cache_break) is 86 // actually optimal: it only pollutes the rolling marker cache, leaving system 87 // + tools + user_1.stable caches intact. 88 func BuildSystemPrompt(opts PromptOptions) PromptParts { 89 system := buildStaticSystem(opts) 90 stable := buildStableContext(opts) 91 volatile := buildVolatileContext(opts) 92 return PromptParts{ 93 System: system, 94 StableContext: stable, 95 VolatileContext: volatile, 96 } 97 } 98 99 // buildStaticSystem assembles content that never changes between turns in a session. 100 func buildStaticSystem(opts PromptOptions) string { 101 var sb strings.Builder 102 103 // 1. Base prompt (persona + core rules — unlimited) 104 sb.WriteString(opts.BasePrompt) 105 106 // Language policy. Byte-stable across all sessions and users so it joins 107 // the cacheable system prefix. Pairs with the shorter per-turn reminder in 108 // VolatileContext which re-anchors the rule against long-session drift. 109 sb.WriteString("\n\n## Language\n") 110 sb.WriteString("Match the user's language on first contact and stay consistent for the rest of the session. " + 111 "If the user writes primarily in Chinese, respond in Chinese; if in English, respond in English; " + 112 "follow the same rule for any other language. Only switch response language when the user explicitly asks " + 113 "(e.g. \"please reply in English\"). Mixed-language user input — such as one English technical term inside a " + 114 "Chinese sentence — is NOT a language-switch signal; continue in the established language. " + 115 "Code identifiers, file paths, CLI commands, and technical terms (API names, library names, error messages) " + 116 "remain in their original form regardless of response language. " + 117 "Maintain full orthographic correctness — all accents, diacritics, and special characters.") 118 119 // 2. Available Tools (stable once session starts) 120 sb.WriteString("\n\n## Available Tools\n") 121 if len(opts.ToolNames) > 0 { 122 sb.WriteString("You have these tools: ") 123 sb.WriteString(strings.Join(opts.ToolNames, ", ")) 124 sb.WriteString(".") 125 } 126 if len(opts.ServerTools) > 0 { 127 if len(opts.ToolNames) > 0 { 128 sb.WriteString("\n") 129 } 130 sb.WriteString("You also have server-side tools: ") 131 sb.WriteString(strings.Join(opts.ServerTools, ", ")) 132 sb.WriteString(".") 133 } 134 135 // Parallel tool-use nudge: agent loops that fire N tool calls across N 136 // iterations grow msgs past Anthropic's ~20-block auto-lookback window, 137 // causing CHR decay in long sessions. Batching independent calls into 138 // ONE response collapses N iterations → 1, keeping the rolling marker 139 // reachable. Only add when tools are actually registered — tool-less 140 // agents would just pay extra cached-prefix tokens. 141 if len(opts.ToolNames) > 0 || len(opts.ServerTools) > 0 { 142 sb.WriteString("\n\nWhen you need independent pieces of information " + 143 "(read multiple files, check several conditions, fetch data from different sources), " + 144 "prefer calling ALL the tools in a SINGLE response with multiple parallel tool_use blocks " + 145 "rather than across sequential turns. This amortizes prompt-cache cost and reduces latency.\n" + 146 "Example — INEFFICIENT (3 turns):\n" + 147 " turn 1: file_read A\n" + 148 " turn 2: file_read B\n" + 149 " turn 3: file_read C\n" + 150 "Example — EFFICIENT (1 turn, 3 parallel tool_use blocks in one response):\n" + 151 " turn 1: file_read A + file_read B + file_read C\n" + 152 "Only sequence when later calls genuinely depend on earlier results.") 153 } 154 155 // Skills are listed in the user message (not system prompt) to preserve 156 // cache prefix stability. See buildSkillListing() in loop.go. 157 158 // 3b. Deferred Tools (only in deferred mode) — name + truncated description. 159 // Model calls tool_search to load full schemas on demand. 160 if len(opts.DeferredTools) > 0 { 161 sb.WriteString("\n\n## Deferred Tools\n") 162 sb.WriteString("Load via `tool_search` when needed, then immediately call the loaded tool.\n") 163 for _, dt := range opts.DeferredTools { 164 desc := dt.Description 165 if len(desc) > 60 { 166 desc = desc[:57] + "..." 167 } 168 fmt.Fprintf(&sb, "- %s: %s\n", dt.Name, desc) 169 } 170 } 171 172 // 4. macOS automation guidance (only on darwin with relevant tools) 173 if guidance := macOSAutomationGuidance(opts.ToolNames); guidance != "" { 174 sb.WriteString("\n\n") 175 sb.WriteString(guidance) 176 } 177 178 // 5. Memory Persistence guidance (stable — depends only on memoryDir presence) 179 if opts.MemoryDir != "" { 180 sb.WriteString("\n\n## Memory Persistence\n") 181 sb.WriteString("Your current memory is shown in the context section below. When you discover something worth remembering across future conversations, use the `memory_append` tool to add new entries.\n") 182 sb.WriteString("IMPORTANT: NEVER use file_write or file_edit on MEMORY.md — they race under concurrent sessions. The memory_append tool is flock-protected and safe.\n") 183 sb.WriteString("Good candidates for memory:\n") 184 sb.WriteString("- Decisions the user made (technical, design, or preferences)\n") 185 sb.WriteString("- User corrections about how they want to work\n") 186 sb.WriteString("- Important facts about projects, people, or systems\n") 187 sb.WriteString("- Patterns, gotchas, or insights you discovered together\n") 188 sb.WriteString("- Configuration or reference information that was hard to find\n\n") 189 sb.WriteString("Keep entries as short one-line bullets. Do NOT save ephemeral task status, code snippets, or things already documented in project files. Your context is automatically compacted in long sessions — anything not written to memory may be lost.") 190 } 191 192 return sb.String() 193 } 194 195 // buildStableContext assembles the cacheable per-session prefix: shared 196 // instructions followed by sticky session facts. Placed before the 197 // <!-- cache_break --> marker in the user message so providers that reuse the 198 // pre-break prefix have a chance to cache-hit on it within a session. 199 // 200 // Ordering: instructions come first because they're the more stable of the 201 // two — file-backed and rarely edited — while sticky facts vary per session 202 // source. Putting the stabler content first gives the gateway/provider more 203 // opportunity to extend a cached prefix. Whether that actually produces a 204 // cross-session cache hit depends on upstream gateway/provider behavior and 205 // on the rest of the prompt state matching, not just the instructions text. 206 // 207 // Truncation: shared instructions are bounded by maxInstructionsChars to keep 208 // the cached prefix within a predictable budget. Oversized content is trimmed 209 // with a [truncated] marker telling the author to reduce file content. 210 func buildStableContext(opts PromptOptions) string { 211 var sb strings.Builder 212 213 if inst := strings.TrimSpace(opts.Instructions); inst != "" { 214 sb.WriteString("## Instructions\n") 215 sb.WriteString(truncate(inst, maxInstructionsChars)) 216 } 217 218 if sticky := strings.TrimSpace(opts.StickyContext); sticky != "" { 219 if sb.Len() > 0 { 220 sb.WriteString("\n\n") 221 } 222 sb.WriteString("## Session Facts\n") 223 sb.WriteString(sticky) 224 } 225 226 // Guarantee a non-empty stable prefix so the gateway attaches a third 227 // cache_control breakpoint (on the user message stable block). When this 228 // is empty the gateway's Anthropic provider falls through its 229 // empty-text-block guard and skips the breakpoint entirely, leaving the 230 // user message uncached. The literal text is stable across all sessions 231 // (no time, no IDs) so the extra bytes go into a shareable cached prefix. 232 if sb.Len() == 0 { 233 sb.WriteString("## Session\nActive agent context.") 234 } 235 236 return sb.String() 237 } 238 239 // buildVolatileContext assembles content that changes between turns. 240 // Placed after the <!-- cache_break --> marker in the user message. 241 func buildVolatileContext(opts PromptOptions) string { 242 var sb strings.Builder 243 244 // Date/time + CWD + model identity + session info 245 sb.WriteString("## Context\n") 246 sb.WriteString("Current date: " + time.Now().Format("2006-01-02 15:04 MST")) 247 if opts.CWD != "" { 248 sb.WriteString("\nWorking directory: " + opts.CWD) 249 } 250 if opts.ModelID != "" { 251 sb.WriteString("\nModel: " + opts.ModelID) 252 } 253 if opts.ContextWindow > 0 { 254 sb.WriteString(fmt.Sprintf("\nContext window: %d tokens", opts.ContextWindow)) 255 } 256 if opts.SessionInfo != "" { 257 sb.WriteString("\n" + opts.SessionInfo) 258 } 259 260 // Output formatting guidance 261 sb.WriteString("\n\n## Output Format\n") 262 sb.WriteString(formatGuidance(opts.OutputFormat)) 263 264 // Per-turn language reminder. Short reinforcement of the full policy in the 265 // System section. Byte-stable (same text every turn) so it does not fragment 266 // any per-turn cache, but positioning near the user message anchors against 267 // drift when long sessions accumulate English tool output. 268 // 269 // On turn 0 "the language already established" is vacuous — nothing has been 270 // established yet. The static System section's "Match the user's language on 271 // first contact" rule handles that case; this reminder takes over from turn 1 272 // onward when there's actually an established language to stay consistent with. 273 sb.WriteString("\n\n## Language\n") 274 sb.WriteString("Respond in the language already established with the user in this session. " + 275 "If the user has not asked for a different language, stay consistent — do not switch even when tool output, " + 276 "skill descriptions, or system messages arrive in a different language. Keep code and technical identifiers in their original form.") 277 278 // Memory — stays volatile: memory_append can mutate MEMORY.md during a 279 // turn, so the block must be re-read and re-sent each Run(). Instructions 280 // live in StableContext (cacheable prefix), not here. 281 if mem := strings.TrimSpace(opts.Memory); mem != "" { 282 sb.WriteString("\n\n## Memory\n") 283 sb.WriteString(truncate(mem, maxMemoryChars)) 284 } 285 286 // MCP server context 287 if mcp := strings.TrimSpace(opts.MCPContext); mcp != "" { 288 sb.WriteString("\n\n## MCP Server Context\n") 289 sb.WriteString(mcp) 290 } 291 292 return sb.String() 293 } 294 295 // formatGuidance returns output formatting instructions based on the profile. 296 func formatGuidance(format string) string { 297 switch format { 298 case "plain": 299 return "Format responses as plain text. Use short paragraphs and simple bullet points. " + 300 "Avoid markdown tables, fenced code blocks, headers, bold/italic, and other rich formatting. " + 301 "Use indentation or blank lines for structure. Keep lines short and readable." 302 default: // "markdown" or empty 303 return "Format text responses using GitHub-flavored markdown (GFM): " + 304 "use headers, fenced code blocks with language tags, lists, bold/italic, and tables where appropriate." 305 } 306 } 307 308 // truncate limits s to maxChars, appending [truncated] if trimmed. 309 func truncate(s string, maxChars int) string { 310 r := []rune(s) 311 if len(r) <= maxChars { 312 return s 313 } 314 return string(r[:maxChars]) + "\n[truncated]" 315 } 316 317 // macOSAutomationGuidance returns workflow guidance for macOS automation tools, 318 // or empty string if not on darwin or no relevant tools are registered. 319 // Each bullet is conditional on the actual tool presence to avoid emitting 320 // guidance for tools the session won't use. 321 func macOSAutomationGuidance(toolNames []string) string { 322 if runtime.GOOS != "darwin" { 323 return "" 324 } 325 has := func(name string) bool { 326 for _, n := range toolNames { 327 if n == name { 328 return true 329 } 330 } 331 return false 332 } 333 var bullets strings.Builder 334 if has("accessibility") { 335 bullets.WriteString("- Prefer `accessibility` (AX API) over `computer` for UI interactions — faster, no screenshot needed.\n") 336 bullets.WriteString("- After annotate or read_tree, click elements by ref (e.g. ref=\"e14\"). Only use coordinate clicks as a last resort.\n") 337 bullets.WriteString("- Always include the app parameter. Use the exact name as shown in the Dock.\n") 338 bullets.WriteString("- Ensure the target app is frontmost before typing. Use accessibility click on the target field first.\n") 339 } 340 if has("computer") && has("accessibility") { 341 bullets.WriteString("- Fall back to `computer` only when AX fails or the target is a canvas/web element.\n") 342 } 343 if has("browser") { 344 bullets.WriteString("- For interacting with web page elements, use `browser` (DOM-level access). Use accessibility only for native macOS UI.\n") 345 } 346 if has("wait_for") { 347 bullets.WriteString("- Use `wait_for` to poll for UI state instead of bash sleep.\n") 348 } 349 if bullets.Len() == 0 { 350 return "" 351 } 352 return "## macOS Automation\n" + bullets.String() 353 }