/ internal / instructions / loader.go
loader.go
  1  package instructions
  2  
  3  import (
  4  	"bufio"
  5  	"fmt"
  6  	"os"
  7  	"path/filepath"
  8  	"regexp"
  9  	"sort"
 10  	"strings"
 11  	"time"
 12  	"unicode/utf8"
 13  
 14  	"github.com/Kocoro-lab/ShanClaw/internal/agents"
 15  )
 16  
 17  // maxCommandFileChars is the maximum character count for a single custom command file.
 18  const maxCommandFileChars = 8000
 19  
 20  // LoadInstructions reads all instruction files and returns combined content.
 21  // shannonDir is the global config directory (e.g. ~/.shannon).
 22  // projectDir is the project-level directory (e.g. .shannon relative to CWD).
 23  // maxTokens is an approximate budget (1 token ~ 4 chars).
 24  // Returns the combined instruction text, truncated if over budget.
 25  func LoadInstructions(shannonDir string, projectDir string, maxTokens int) (string, error) {
 26  	type source struct {
 27  		path     string
 28  		priority int // higher = higher priority
 29  	}
 30  
 31  	var sources []source
 32  	priority := 0
 33  
 34  	// 1. Global instructions
 35  	if shannonDir != "" {
 36  		sources = append(sources, source{filepath.Join(shannonDir, "instructions.md"), priority})
 37  		priority++
 38  
 39  		// 2. Global rules (sorted alphabetically)
 40  		ruleFiles := sortedMDFiles(filepath.Join(shannonDir, "rules"))
 41  		for _, rf := range ruleFiles {
 42  			sources = append(sources, source{rf, priority})
 43  			priority++
 44  		}
 45  	}
 46  
 47  	// 3. Project instructions
 48  	if projectDir != "" {
 49  		sources = append(sources, source{filepath.Join(projectDir, "instructions.md"), priority})
 50  		priority++
 51  
 52  		// 4. Project rules
 53  		ruleFiles := sortedMDFiles(filepath.Join(projectDir, "rules"))
 54  		for _, rf := range ruleFiles {
 55  			sources = append(sources, source{rf, priority})
 56  			priority++
 57  		}
 58  
 59  		// 5. Project local
 60  		sources = append(sources, source{filepath.Join(projectDir, "instructions.local.md"), priority})
 61  		priority++
 62  	}
 63  
 64  	// Load file contents in order, tracking lines for deduplication.
 65  	// Lines from higher-priority files take precedence.
 66  	type fileContent struct {
 67  		path     string
 68  		lines    []string
 69  		priority int
 70  	}
 71  
 72  	var loaded []fileContent
 73  	for _, src := range sources {
 74  		data, err := readMDFile(src.path)
 75  		if err != nil {
 76  			continue // file doesn't exist or isn't valid — skip
 77  		}
 78  		lines := strings.Split(data, "\n")
 79  		loaded = append(loaded, fileContent{path: src.path, lines: lines, priority: src.priority})
 80  	}
 81  
 82  	// Deduplicate: track which non-empty, non-whitespace lines we've seen.
 83  	// Process from highest priority to lowest. Keep only the highest-priority
 84  	// occurrence of each line.
 85  	seenLines := make(map[string]struct{})
 86  
 87  	// First pass: collect all lines from highest priority, marking them as seen.
 88  	// We process in reverse order (highest priority first) to build the seen set.
 89  	for i := len(loaded) - 1; i >= 0; i-- {
 90  		fc := &loaded[i]
 91  		deduped := make([]string, 0, len(fc.lines))
 92  		for _, line := range fc.lines {
 93  			trimmed := strings.TrimSpace(line)
 94  			if trimmed == "" {
 95  				deduped = append(deduped, line)
 96  				continue
 97  			}
 98  			if _, exists := seenLines[trimmed]; !exists {
 99  				seenLines[trimmed] = struct{}{}
100  				deduped = append(deduped, line)
101  			}
102  		}
103  		fc.lines = deduped
104  	}
105  
106  	// Build output in load order (lowest priority first).
107  	//
108  	// Provenance markers (<!-- from: /path --> comments) are intentionally
109  	// NOT emitted here. The loaded bundle rides in the cached prompt prefix
110  	// (see internal/prompt/builder.go buildStableContext), and every byte
111  	// here is replicated into every cached session. Filesystem paths are
112  	// pure overhead for the model. If provenance debugging is needed, add
113  	// explicit debug instrumentation at the loader rather than embedding
114  	// path comments in the prompt itself.
115  	maxChars := maxTokens * 4
116  	var parts []string
117  	for _, fc := range loaded {
118  		content := strings.Join(fc.lines, "\n")
119  		content = strings.TrimSpace(content)
120  		if content == "" {
121  			continue
122  		}
123  		parts = append(parts, content)
124  	}
125  
126  	result := strings.Join(parts, "\n\n")
127  	if len(result) > maxChars {
128  		result = result[:maxChars]
129  		result += "\n[Instructions truncated — reduce content in lower-priority files]"
130  	}
131  
132  	return result, nil
133  }
134  
135  // LoadMemory reads the MEMORY.md file from shannonDir/memory/MEMORY.md.
136  // Returns the first maxLines lines of the file.
137  // If the file doesn't exist, returns an empty string (not an error).
138  func LoadMemory(shannonDir string, maxLines int) (string, error) {
139  	if shannonDir == "" {
140  		return "", nil
141  	}
142  	return LoadMemoryFrom(filepath.Join(shannonDir, "memory"), maxLines)
143  }
144  
145  // LoadMemoryFrom reads MEMORY.md from the given directory.
146  // Returns the first maxLines lines of the file.
147  // If the file doesn't exist, returns an empty string (not an error).
148  // Markdown links to .md files in the same directory are auto-expanded inline
149  // so the LLM sees the full content without needing extra file_read calls.
150  func LoadMemoryFrom(dir string, maxLines int) (string, error) {
151  	if dir == "" {
152  		return "", nil
153  	}
154  	path := filepath.Join(dir, "MEMORY.md")
155  	f, err := os.Open(path)
156  	if err != nil {
157  		if os.IsNotExist(err) {
158  			return "", nil
159  		}
160  		return "", err
161  	}
162  	defer f.Close()
163  
164  	var lines []string
165  	scanner := bufio.NewScanner(f)
166  	for scanner.Scan() {
167  		if len(lines) >= maxLines {
168  			break
169  		}
170  		line := scanner.Text()
171  
172  		// Check for markdown links to local .md files and expand them inline.
173  		// Pattern: [text](filename.md) where filename.md is in the same dir.
174  		if ref := extractLocalMDLink(line); ref != "" {
175  			refPath := filepath.Join(dir, ref)
176  			if data, readErr := os.ReadFile(refPath); readErr == nil && utf8.Valid(data) {
177  				// Replace the pointer line with the file's content
178  				refLines := strings.Split(strings.TrimSpace(string(data)), "\n")
179  				for _, rl := range refLines {
180  					if len(lines) >= maxLines {
181  						break
182  					}
183  					lines = append(lines, rl)
184  				}
185  				continue
186  			}
187  		}
188  
189  		lines = append(lines, line)
190  	}
191  	if err := scanner.Err(); err != nil {
192  		return "", err
193  	}
194  
195  	result := strings.Join(lines, "\n")
196  	return annotateStaleness(result, time.Now()), nil
197  }
198  
199  // memoryDateRe matches heading lines with dates in parentheses.
200  // Handles both # and ## levels: "## Auto-persisted (2025-01-15)" and
201  // "# Auto-persisted Learnings (2025-01-15 14:30)".
202  var memoryDateRe = regexp.MustCompile(`(?m)^(#{1,2} .+\((\d{4}-\d{2}-\d{2})[^)]*\))`)
203  
204  // annotateStaleness appends "[N days ago]" to memory headings that contain dates.
205  // Helps the model reason about memory freshness without mental date math.
206  func annotateStaleness(content string, now time.Time) string {
207  	return memoryDateRe.ReplaceAllStringFunc(content, func(match string) string {
208  		sub := memoryDateRe.FindStringSubmatch(match)
209  		if len(sub) < 3 {
210  			return match
211  		}
212  		t, err := time.Parse("2006-01-02", sub[2])
213  		if err != nil {
214  			return match
215  		}
216  		days := int(now.Sub(t).Hours() / 24)
217  		if days == 0 {
218  			return match + " [today]"
219  		}
220  		if days == 1 {
221  			return match + " [yesterday]"
222  		}
223  		return match + fmt.Sprintf(" [%d days ago]", days)
224  	})
225  }
226  
227  // extractLocalMDLink extracts a local .md filename from a markdown link in a line.
228  // Returns the filename if found, or empty string.
229  // Matches patterns like: [anything](filename.md) where filename doesn't contain / or ..
230  func extractLocalMDLink(line string) string {
231  	// Look for ](filename.md) pattern
232  	idx := strings.Index(line, "](")
233  	if idx < 0 {
234  		return ""
235  	}
236  	rest := line[idx+2:]
237  	end := strings.Index(rest, ")")
238  	if end < 0 {
239  		return ""
240  	}
241  	ref := rest[:end]
242  
243  	// Must be a .md file, local (no slashes, no ..)
244  	if !strings.HasSuffix(ref, ".md") {
245  		return ""
246  	}
247  	if strings.Contains(ref, "/") || strings.Contains(ref, "\\") || strings.Contains(ref, "..") {
248  		return ""
249  	}
250  	// Don't expand MEMORY.md itself (avoid infinite loop)
251  	if ref == "MEMORY.md" {
252  		return ""
253  	}
254  	return ref
255  }
256  
257  // LoadCustomCommands scans for .md files in command directories.
258  // Returns a map of command name -> file content.
259  // Project commands override global commands with the same name.
260  // Built-in command names cannot be overridden and are skipped with a warning to stderr.
261  func LoadCustomCommands(shannonDir string, projectDir string) (map[string]string, error) {
262  	commands := make(map[string]string)
263  
264  	// Load global commands first
265  	if shannonDir != "" {
266  		loadCommandDir(filepath.Join(shannonDir, "commands"), commands)
267  	}
268  
269  	// Load project commands (overrides global)
270  	if projectDir != "" {
271  		loadCommandDir(filepath.Join(projectDir, "commands"), commands)
272  	}
273  
274  	return commands, nil
275  }
276  
277  // loadCommandDir scans a directory for .md files and adds them to the commands map.
278  func loadCommandDir(dir string, commands map[string]string) {
279  	files := sortedMDFiles(dir)
280  	for _, path := range files {
281  		name := strings.TrimSuffix(filepath.Base(path), ".md")
282  		if agents.BuiltinCommands[name] {
283  			fmt.Fprintf(os.Stderr, "warning: custom command %q skipped — conflicts with built-in command\n", name)
284  			continue
285  		}
286  		data, err := readMDFile(path)
287  		if err != nil {
288  			continue
289  		}
290  		if len(data) > maxCommandFileChars {
291  			data = data[:maxCommandFileChars]
292  		}
293  		commands[name] = data
294  	}
295  }
296  
297  // sortedMDFiles returns all .md files in dir, sorted alphabetically.
298  // Returns nil if the directory doesn't exist.
299  func sortedMDFiles(dir string) []string {
300  	pattern := filepath.Join(dir, "*.md")
301  	matches, err := filepath.Glob(pattern)
302  	if err != nil || len(matches) == 0 {
303  		return nil
304  	}
305  	sort.Strings(matches)
306  	return matches
307  }
308  
309  // readMDFile reads a file if it exists, is a .md file, and contains valid UTF-8.
310  // Returns the file contents or an error.
311  func readMDFile(path string) (string, error) {
312  	if filepath.Ext(path) != ".md" {
313  		return "", fmt.Errorf("not a .md file: %s", path)
314  	}
315  	data, err := os.ReadFile(path)
316  	if err != nil {
317  		return "", err
318  	}
319  	if !utf8.Valid(data) {
320  		return "", fmt.Errorf("file is not valid UTF-8: %s", path)
321  	}
322  	return string(data), nil
323  }