/ internal / skills / loader.go
loader.go
  1  package skills
  2  
  3  import (
  4  	"bytes"
  5  	"fmt"
  6  	"log"
  7  	"os"
  8  	"path/filepath"
  9  	"sort"
 10  	"strings"
 11  
 12  	"github.com/Kocoro-lab/ShanClaw/internal/skills/bundled"
 13  	"github.com/adrg/frontmatter"
 14  	"gopkg.in/yaml.v3"
 15  )
 16  
 17  type SkillSource struct {
 18  	Dir    string
 19  	Source string
 20  }
 21  
 22  const (
 23  	SourceGlobal  = "global"
 24  	SourceBundled = "bundled"
 25  )
 26  
 27  func BundledSkillSource(shannonDir string) (SkillSource, error) {
 28  	dir, err := bundled.ExtractBundledSkills(shannonDir)
 29  	if err != nil {
 30  		return SkillSource{}, err
 31  	}
 32  	return SkillSource{Dir: dir, Source: SourceBundled}, nil
 33  }
 34  
 35  type skillFrontmatter struct {
 36  	Name string `yaml:"name"`
 37  	// Slug is not part of the Agent Skills spec nor the openclaw/clawhub
 38  	// spec (slug is derived from the directory basename). We accept the
 39  	// YAML key so authors who historically set it don't blow up
 40  	// unmarshal, but we intentionally ignore its value — Skill.Slug is
 41  	// always the on-disk directory name.
 42  	Slug          string         `yaml:"slug,omitempty"`
 43  	Description   string         `yaml:"description"`
 44  	License       string         `yaml:"license"`
 45  	Compatibility string         `yaml:"compatibility"`
 46  	// Metadata is intentionally `map[string]any` so nested YAML values
 47  	// (ClawHub skills embed a structured `clawdbot` object with emoji,
 48  	// required bins, etc.) round-trip through loadSkillMD without blowing
 49  	// up unmarshal. A flat `map[string]string` would reject any non-string
 50  	// value and surface as ErrInvalidSkillPayload / HTTP 422 "malformed"
 51  	// — see the regression test in marketplace_test.go.
 52  	Metadata     map[string]any `yaml:"metadata,omitempty"`
 53  	AllowedTools string         `yaml:"allowed-tools,omitempty"`
 54  	// StickyInstructions opts the skill into post-activation / post-drift
 55  	// <system-reminder> reinjection. Opt-in only. See Skill.StickyInstructions.
 56  	// omitempty so skills that never set it don't gain a noisy
 57  	// `sticky-instructions: false` line on re-save.
 58  	StickyInstructions bool `yaml:"sticky-instructions,omitempty"`
 59  	// Hidden, when true, excludes the skill from the default GET /skills
 60  	// listing so frontends don't show it to users. Display-only flag — the
 61  	// skill still loads, still participates in discovery, still invokable
 62  	// via use_skill. Intended for policy/internal skills like kocoro that
 63  	// the LLM must keep using but users should not see.
 64  	Hidden bool `yaml:"hidden,omitempty"`
 65  	// StickySnippet, when set, overrides the auto-extracted snippet so
 66  	// authors can pin the precise guidance to re-inject. Essential for
 67  	// skills where the first paragraph is boilerplate ("You help users ...")
 68  	// but the actual policy sits further down. Falls back to the imperative-
 69  	// paragraph heuristic and then to the first non-heading paragraph.
 70  	StickySnippet string `yaml:"sticky-snippet,omitempty"`
 71  }
 72  
 73  
 74  func LoadSkills(sources ...SkillSource) ([]*Skill, error) {
 75  	seen := make(map[string]bool)
 76  	var result []*Skill
 77  
 78  	for _, src := range sources {
 79  		if _, err := os.Stat(src.Dir); os.IsNotExist(err) {
 80  			continue
 81  		}
 82  		warnLegacyYAML(src.Dir)
 83  
 84  		entries, err := os.ReadDir(src.Dir)
 85  		if err != nil {
 86  			continue
 87  		}
 88  		names := make([]string, 0, len(entries))
 89  		for _, e := range entries {
 90  			if e.IsDir() {
 91  				names = append(names, e.Name())
 92  			}
 93  		}
 94  		sort.Strings(names)
 95  
 96  		for _, name := range names {
 97  			if seen[name] {
 98  				continue
 99  			}
100  			skillDir := filepath.Join(src.Dir, name)
101  			skillFile := filepath.Join(skillDir, "SKILL.md")
102  			if _, err := os.Stat(skillFile); os.IsNotExist(err) {
103  				continue
104  			}
105  			s, err := loadSkillMD(skillFile, name, src.Source)
106  			if err != nil {
107  				// Fail open per skill: a malformed SKILL.md must not block
108  				// every other skill in the same (or any other) source. Log a
109  				// warning that names the file path so the user can find and
110  				// fix it, then move on without marking `seen[name]` — that
111  				// way a valid lower-priority version of the same skill name
112  				// (e.g. bundled vs broken global) can still take over.
113  				log.Printf("WARNING: skipping skill %q (%s): %v", name, skillFile, err)
114  				continue
115  			}
116  			s.Dir = skillDir
117  			s.InstallSource, s.MarketplaceSlug = installProvenanceForSkill(src.Source, skillDir)
118  			seen[name] = true
119  			result = append(result, s)
120  		}
121  	}
122  	return result, nil
123  }
124  
125  func loadSkillMD(path, dirName, source string) (*Skill, error) {
126  	data, err := os.ReadFile(path)
127  	if err != nil {
128  		return nil, err
129  	}
130  	var fm skillFrontmatter
131  	body, err := frontmatter.Parse(bytes.NewReader(data), &fm, frontmatter.NewFormat("---", "---", yaml.Unmarshal))
132  	if err != nil {
133  		return nil, fmt.Errorf("parse frontmatter: %w", err)
134  	}
135  	if fm.Name == "" {
136  		return nil, fmt.Errorf("skill name is required in frontmatter")
137  	}
138  	// Slug is the on-disk / URL-safe identifier, always derived from the
139  	// directory name (per openclaw/clawhub docs/skill-format.md). We don't
140  	// require frontmatter.name to equal dirName: some ClawHub authors ship
141  	// skills where `name` is a display label (e.g. `name: xiaohongshu`) and
142  	// the marketplace slug is different (e.g. `xiaohongshu-mcp-skills`).
143  	// Both names are valid and must coexist so the skill still installs.
144  	if err := ValidateSkillName(dirName); err != nil {
145  		return nil, fmt.Errorf("directory name %q is not a valid slug: %w", dirName, err)
146  	}
147  	// Bound frontmatter.name so it can't smuggle unusual characters into
148  	// prompts shown to the LLM (skill catalog, use_skill results, sticky
149  	// reinjection snippets). Formatting-sensitive: reject control chars
150  	// and any newline. Length cap prevents oversized catalog entries.
151  	if err := validateFrontmatterName(fm.Name); err != nil {
152  		return nil, err
153  	}
154  	// If an author set `slug:` in frontmatter and it disagrees with the
155  	// directory, warn so the mismatch surfaces during development. We
156  	// still trust the directory name — it's the URL/marketplace truth.
157  	if fm.Slug != "" && fm.Slug != dirName {
158  		log.Printf("WARNING: skill %s/SKILL.md has slug: %q which does not match directory %q; using the directory as the authoritative slug",
159  			path, fm.Slug, dirName)
160  	}
161  	if fm.Description == "" {
162  		return nil, fmt.Errorf("skill description is required")
163  	}
164  	var allowedTools []string
165  	if fm.AllowedTools != "" {
166  		allowedTools = strings.Fields(fm.AllowedTools)
167  	}
168  	prompt := strings.TrimSpace(string(body))
169  	override := strings.TrimSpace(fm.StickySnippet)
170  	snippet := override
171  	if snippet == "" {
172  		snippet = extractStickySnippet(prompt)
173  	}
174  	if snippet == "" {
175  		snippet = fm.Description
176  	}
177  	snippet = truncateStickySnippet(snippet, stickySnippetMaxChars)
178  	return &Skill{
179  		Name:                  fm.Name,
180  		Slug:                  dirName,
181  		Description:           fm.Description,
182  		Prompt:                prompt,
183  		License:               fm.License,
184  		Compatibility:         fm.Compatibility,
185  		Metadata:              fm.Metadata,
186  		AllowedTools:          allowedTools,
187  		StickyInstructions:    fm.StickyInstructions,
188  		StickySnippet:         snippet,
189  		StickySnippetOverride: override,
190  		Hidden:                fm.Hidden,
191  		Source:                source,
192  	}, nil
193  }
194  
195  // stickySnippetMaxChars caps the per-activation / per-drift reinjection size.
196  // 400 chars is the budget called out in the task plan — adds to the turn after
197  // use_skill and after every skill-filter denial, so keep it small.
198  const stickySnippetMaxChars = 400
199  
200  // imperativeMarkers identify paragraphs with actionable policy language.
201  // Matched case-sensitively for EN (caps are a strong imperative signal —
202  // "MUST use" vs "must use") and as substring for CJK.
203  var imperativeMarkers = []string{
204  	// EN — capitalized imperatives (strong signal)
205  	"MUST", "ALWAYS", "NEVER", "DO NOT", "DON'T",
206  	"REQUIRED", "ONLY", "ALL ",
207  	// EN — sentence-start imperatives (moderate signal, case-insensitive check)
208  	"Never ", "Always ", "Must ", "Use the ", "Do not ",
209  	// ZH
210  	"必须", "绝不", "仅限", "总是", "不要", "只能",
211  	// JA
212  	"必ず", "決して", "絶対", "禁止", "常に", "使用してください",
213  }
214  
215  // extractStickySnippet returns a single paragraph from the SKILL.md body
216  // most likely to be actionable guidance. Selection order:
217  //   1. First paragraph containing any imperativeMarker ("MUST", "NEVER",
218  //      "必须", "必ず", …) — these are pre-filtered actionable policy.
219  //   2. First non-heading paragraph — title/boilerplate is skipped.
220  // Newlines within the paragraph are collapsed to single spaces so the
221  // snippet renders cleanly inside a single-line <system-reminder>.
222  // Returns "" when no suitable paragraph is found (caller falls back to
223  // Description). Authors can override with the `sticky-snippet:` frontmatter
224  // field when neither heuristic picks the right paragraph.
225  func extractStickySnippet(body string) string {
226  	if body == "" {
227  		return ""
228  	}
229  	paragraphs := strings.Split(body, "\n\n")
230  
231  	// Pass 1: prefer paragraphs with imperative/policy markers. Ignore
232  	// headings but don't require them to be absent — a paragraph can start
233  	// with "**MUST:** ..." and that's still policy.
234  	for _, p := range paragraphs {
235  		p = strings.TrimSpace(p)
236  		if p == "" {
237  			continue
238  		}
239  		if strings.HasPrefix(p, "#") {
240  			continue
241  		}
242  		if hasImperativeMarker(p) {
243  			return strings.Join(strings.Fields(p), " ")
244  		}
245  	}
246  
247  	// Pass 2: fall back to first non-heading paragraph.
248  	for _, p := range paragraphs {
249  		p = strings.TrimSpace(p)
250  		if p == "" {
251  			continue
252  		}
253  		if strings.HasPrefix(p, "#") {
254  			continue
255  		}
256  		return strings.Join(strings.Fields(p), " ")
257  	}
258  	return ""
259  }
260  
261  // hasImperativeMarker reports whether p contains any imperative/policy
262  // marker. EN caps markers require exact case; CJK markers use substring.
263  func hasImperativeMarker(p string) bool {
264  	for _, m := range imperativeMarkers {
265  		if strings.Contains(p, m) {
266  			return true
267  		}
268  	}
269  	return false
270  }
271  
272  // truncateStickySnippet rune-safe truncates to max chars, appending an
273  // ellipsis when shortened so the model can tell the reminder is abbreviated.
274  func truncateStickySnippet(s string, max int) string {
275  	if max <= 0 {
276  		return ""
277  	}
278  	runes := []rune(s)
279  	if len(runes) <= max {
280  		return s
281  	}
282  	if max <= 3 {
283  		return string(runes[:max])
284  	}
285  	return string(runes[:max-3]) + "..."
286  }
287  
288  func warnLegacyYAML(dir string) {
289  	matches, _ := filepath.Glob(filepath.Join(dir, "*.yaml"))
290  	if len(matches) > 0 {
291  		log.Printf("WARNING: Found legacy skills/*.yaml files in %s — migrate to SKILL.md format", dir)
292  	}
293  	matches, _ = filepath.Glob(filepath.Join(dir, "*.yml"))
294  	if len(matches) > 0 {
295  		log.Printf("WARNING: Found legacy skills/*.yml files in %s — migrate to SKILL.md format", dir)
296  	}
297  }