loader.go
1 package instructions 2 3 import ( 4 "bufio" 5 "fmt" 6 "os" 7 "path/filepath" 8 "regexp" 9 "sort" 10 "strings" 11 "time" 12 "unicode/utf8" 13 14 "github.com/Kocoro-lab/ShanClaw/internal/agents" 15 ) 16 17 // maxCommandFileChars is the maximum character count for a single custom command file. 18 const maxCommandFileChars = 8000 19 20 // LoadInstructions reads all instruction files and returns combined content. 21 // shannonDir is the global config directory (e.g. ~/.shannon). 22 // projectDir is the project-level directory (e.g. .shannon relative to CWD). 23 // maxTokens is an approximate budget (1 token ~ 4 chars). 24 // Returns the combined instruction text, truncated if over budget. 25 func LoadInstructions(shannonDir string, projectDir string, maxTokens int) (string, error) { 26 type source struct { 27 path string 28 priority int // higher = higher priority 29 } 30 31 var sources []source 32 priority := 0 33 34 // 1. Global instructions 35 if shannonDir != "" { 36 sources = append(sources, source{filepath.Join(shannonDir, "instructions.md"), priority}) 37 priority++ 38 39 // 2. Global rules (sorted alphabetically) 40 ruleFiles := sortedMDFiles(filepath.Join(shannonDir, "rules")) 41 for _, rf := range ruleFiles { 42 sources = append(sources, source{rf, priority}) 43 priority++ 44 } 45 } 46 47 // 3. Project instructions 48 if projectDir != "" { 49 sources = append(sources, source{filepath.Join(projectDir, "instructions.md"), priority}) 50 priority++ 51 52 // 4. Project rules 53 ruleFiles := sortedMDFiles(filepath.Join(projectDir, "rules")) 54 for _, rf := range ruleFiles { 55 sources = append(sources, source{rf, priority}) 56 priority++ 57 } 58 59 // 5. Project local 60 sources = append(sources, source{filepath.Join(projectDir, "instructions.local.md"), priority}) 61 priority++ 62 } 63 64 // Load file contents in order, tracking lines for deduplication. 65 // Lines from higher-priority files take precedence. 66 type fileContent struct { 67 path string 68 lines []string 69 priority int 70 } 71 72 var loaded []fileContent 73 for _, src := range sources { 74 data, err := readMDFile(src.path) 75 if err != nil { 76 continue // file doesn't exist or isn't valid — skip 77 } 78 lines := strings.Split(data, "\n") 79 loaded = append(loaded, fileContent{path: src.path, lines: lines, priority: src.priority}) 80 } 81 82 // Deduplicate: track which non-empty, non-whitespace lines we've seen. 83 // Process from highest priority to lowest. Keep only the highest-priority 84 // occurrence of each line. 85 seenLines := make(map[string]struct{}) 86 87 // First pass: collect all lines from highest priority, marking them as seen. 88 // We process in reverse order (highest priority first) to build the seen set. 89 for i := len(loaded) - 1; i >= 0; i-- { 90 fc := &loaded[i] 91 deduped := make([]string, 0, len(fc.lines)) 92 for _, line := range fc.lines { 93 trimmed := strings.TrimSpace(line) 94 if trimmed == "" { 95 deduped = append(deduped, line) 96 continue 97 } 98 if _, exists := seenLines[trimmed]; !exists { 99 seenLines[trimmed] = struct{}{} 100 deduped = append(deduped, line) 101 } 102 } 103 fc.lines = deduped 104 } 105 106 // Build output in load order (lowest priority first). 107 // 108 // Provenance markers (<!-- from: /path --> comments) are intentionally 109 // NOT emitted here. The loaded bundle rides in the cached prompt prefix 110 // (see internal/prompt/builder.go buildStableContext), and every byte 111 // here is replicated into every cached session. Filesystem paths are 112 // pure overhead for the model. If provenance debugging is needed, add 113 // explicit debug instrumentation at the loader rather than embedding 114 // path comments in the prompt itself. 115 maxChars := maxTokens * 4 116 var parts []string 117 for _, fc := range loaded { 118 content := strings.Join(fc.lines, "\n") 119 content = strings.TrimSpace(content) 120 if content == "" { 121 continue 122 } 123 parts = append(parts, content) 124 } 125 126 result := strings.Join(parts, "\n\n") 127 if len(result) > maxChars { 128 result = result[:maxChars] 129 result += "\n[Instructions truncated — reduce content in lower-priority files]" 130 } 131 132 return result, nil 133 } 134 135 // LoadMemory reads the MEMORY.md file from shannonDir/memory/MEMORY.md. 136 // Returns the first maxLines lines of the file. 137 // If the file doesn't exist, returns an empty string (not an error). 138 func LoadMemory(shannonDir string, maxLines int) (string, error) { 139 if shannonDir == "" { 140 return "", nil 141 } 142 return LoadMemoryFrom(filepath.Join(shannonDir, "memory"), maxLines) 143 } 144 145 // LoadMemoryFrom reads MEMORY.md from the given directory. 146 // Returns the first maxLines lines of the file. 147 // If the file doesn't exist, returns an empty string (not an error). 148 // Markdown links to .md files in the same directory are auto-expanded inline 149 // so the LLM sees the full content without needing extra file_read calls. 150 func LoadMemoryFrom(dir string, maxLines int) (string, error) { 151 if dir == "" { 152 return "", nil 153 } 154 path := filepath.Join(dir, "MEMORY.md") 155 f, err := os.Open(path) 156 if err != nil { 157 if os.IsNotExist(err) { 158 return "", nil 159 } 160 return "", err 161 } 162 defer f.Close() 163 164 var lines []string 165 scanner := bufio.NewScanner(f) 166 for scanner.Scan() { 167 if len(lines) >= maxLines { 168 break 169 } 170 line := scanner.Text() 171 172 // Check for markdown links to local .md files and expand them inline. 173 // Pattern: [text](filename.md) where filename.md is in the same dir. 174 if ref := extractLocalMDLink(line); ref != "" { 175 refPath := filepath.Join(dir, ref) 176 if data, readErr := os.ReadFile(refPath); readErr == nil && utf8.Valid(data) { 177 // Replace the pointer line with the file's content 178 refLines := strings.Split(strings.TrimSpace(string(data)), "\n") 179 for _, rl := range refLines { 180 if len(lines) >= maxLines { 181 break 182 } 183 lines = append(lines, rl) 184 } 185 continue 186 } 187 } 188 189 lines = append(lines, line) 190 } 191 if err := scanner.Err(); err != nil { 192 return "", err 193 } 194 195 result := strings.Join(lines, "\n") 196 return annotateStaleness(result, time.Now()), nil 197 } 198 199 // memoryDateRe matches heading lines with dates in parentheses. 200 // Handles both # and ## levels: "## Auto-persisted (2025-01-15)" and 201 // "# Auto-persisted Learnings (2025-01-15 14:30)". 202 var memoryDateRe = regexp.MustCompile(`(?m)^(#{1,2} .+\((\d{4}-\d{2}-\d{2})[^)]*\))`) 203 204 // annotateStaleness appends "[N days ago]" to memory headings that contain dates. 205 // Helps the model reason about memory freshness without mental date math. 206 func annotateStaleness(content string, now time.Time) string { 207 return memoryDateRe.ReplaceAllStringFunc(content, func(match string) string { 208 sub := memoryDateRe.FindStringSubmatch(match) 209 if len(sub) < 3 { 210 return match 211 } 212 t, err := time.Parse("2006-01-02", sub[2]) 213 if err != nil { 214 return match 215 } 216 days := int(now.Sub(t).Hours() / 24) 217 if days == 0 { 218 return match + " [today]" 219 } 220 if days == 1 { 221 return match + " [yesterday]" 222 } 223 return match + fmt.Sprintf(" [%d days ago]", days) 224 }) 225 } 226 227 // extractLocalMDLink extracts a local .md filename from a markdown link in a line. 228 // Returns the filename if found, or empty string. 229 // Matches patterns like: [anything](filename.md) where filename doesn't contain / or .. 230 func extractLocalMDLink(line string) string { 231 // Look for ](filename.md) pattern 232 idx := strings.Index(line, "](") 233 if idx < 0 { 234 return "" 235 } 236 rest := line[idx+2:] 237 end := strings.Index(rest, ")") 238 if end < 0 { 239 return "" 240 } 241 ref := rest[:end] 242 243 // Must be a .md file, local (no slashes, no ..) 244 if !strings.HasSuffix(ref, ".md") { 245 return "" 246 } 247 if strings.Contains(ref, "/") || strings.Contains(ref, "\\") || strings.Contains(ref, "..") { 248 return "" 249 } 250 // Don't expand MEMORY.md itself (avoid infinite loop) 251 if ref == "MEMORY.md" { 252 return "" 253 } 254 return ref 255 } 256 257 // LoadCustomCommands scans for .md files in command directories. 258 // Returns a map of command name -> file content. 259 // Project commands override global commands with the same name. 260 // Built-in command names cannot be overridden and are skipped with a warning to stderr. 261 func LoadCustomCommands(shannonDir string, projectDir string) (map[string]string, error) { 262 commands := make(map[string]string) 263 264 // Load global commands first 265 if shannonDir != "" { 266 loadCommandDir(filepath.Join(shannonDir, "commands"), commands) 267 } 268 269 // Load project commands (overrides global) 270 if projectDir != "" { 271 loadCommandDir(filepath.Join(projectDir, "commands"), commands) 272 } 273 274 return commands, nil 275 } 276 277 // loadCommandDir scans a directory for .md files and adds them to the commands map. 278 func loadCommandDir(dir string, commands map[string]string) { 279 files := sortedMDFiles(dir) 280 for _, path := range files { 281 name := strings.TrimSuffix(filepath.Base(path), ".md") 282 if agents.BuiltinCommands[name] { 283 fmt.Fprintf(os.Stderr, "warning: custom command %q skipped — conflicts with built-in command\n", name) 284 continue 285 } 286 data, err := readMDFile(path) 287 if err != nil { 288 continue 289 } 290 if len(data) > maxCommandFileChars { 291 data = data[:maxCommandFileChars] 292 } 293 commands[name] = data 294 } 295 } 296 297 // sortedMDFiles returns all .md files in dir, sorted alphabetically. 298 // Returns nil if the directory doesn't exist. 299 func sortedMDFiles(dir string) []string { 300 pattern := filepath.Join(dir, "*.md") 301 matches, err := filepath.Glob(pattern) 302 if err != nil || len(matches) == 0 { 303 return nil 304 } 305 sort.Strings(matches) 306 return matches 307 } 308 309 // readMDFile reads a file if it exists, is a .md file, and contains valid UTF-8. 310 // Returns the file contents or an error. 311 func readMDFile(path string) (string, error) { 312 if filepath.Ext(path) != ".md" { 313 return "", fmt.Errorf("not a .md file: %s", path) 314 } 315 data, err := os.ReadFile(path) 316 if err != nil { 317 return "", err 318 } 319 if !utf8.Valid(data) { 320 return "", fmt.Errorf("file is not valid UTF-8: %s", path) 321 } 322 return string(data), nil 323 }