/ src / utils / frontmatterParser.ts
frontmatterParser.ts
  1  /**
  2   * Frontmatter parser for markdown files
  3   * Extracts and parses YAML frontmatter between --- delimiters
  4   */
  5  
  6  import { logForDebugging } from './debug.js'
  7  import type { HooksSettings } from './settings/types.js'
  8  import { parseYaml } from './yaml.js'
  9  
 10  export type FrontmatterData = {
 11    // YAML can return null for keys with no value (e.g., "key:" with nothing after)
 12    'allowed-tools'?: string | string[] | null
 13    description?: string | null
 14    // Memory type: 'user', 'feedback', 'project', or 'reference'
 15    // Only applicable to memory files; narrowed via parseMemoryType() in src/memdir/memoryTypes.ts
 16    type?: string | null
 17    'argument-hint'?: string | null
 18    when_to_use?: string | null
 19    version?: string | null
 20    // Only applicable to slash commands -- a string similar to a boolean env var
 21    // to determine whether to make them visible to the SlashCommand tool.
 22    'hide-from-slash-command-tool'?: string | null
 23    // Model alias or name (e.g., 'haiku', 'sonnet', 'opus', or specific model names)
 24    // Use 'inherit' for commands to use the parent model
 25    model?: string | null
 26    // Comma-separated list of skill names to preload (only applicable to agents)
 27    skills?: string | null
 28    // Whether users can invoke this skill by typing /skill-name
 29    // 'true' = user can type /skill-name to invoke
 30    // 'false' = only model can invoke via Skill tool
 31    // Default depends on source: commands/ defaults to true, skills/ defaults to false
 32    'user-invocable'?: string | null
 33    // Hooks to register when this skill is invoked
 34    // Keys are hook events (PreToolUse, PostToolUse, Stop, etc.)
 35    // Values are arrays of matcher configurations with hooks
 36    // Validated by HooksSchema in loadSkillsDir.ts
 37    hooks?: HooksSettings | null
 38    // Effort level for agents (e.g., 'low', 'medium', 'high', 'max', or an integer)
 39    // Controls the thinking effort used by the agent's model
 40    effort?: string | null
 41    // Execution context for skills: 'inline' (default) or 'fork' (run as sub-agent)
 42    // 'inline' = skill content expands into the current conversation
 43    // 'fork' = skill runs in a sub-agent with separate context and token budget
 44    context?: 'inline' | 'fork' | null
 45    // Agent type to use when forked (e.g., 'Bash', 'general-purpose')
 46    // Only applicable when context is 'fork'
 47    agent?: string | null
 48    // Glob patterns for file paths this skill applies to. Accepts either a
 49    // comma-separated string or a YAML list of strings.
 50    // When set, the skill is only activated when the model touches matching files
 51    // Uses the same format as CLAUDE.md paths frontmatter
 52    paths?: string | string[] | null
 53    // Shell to use for !`cmd` and ```! blocks in skill/command .md content.
 54    // 'bash' (default) or 'powershell'. File-scoped — applies to all !-blocks.
 55    // Never consults settings.defaultShell: skills are portable across platforms,
 56    // so the author picks the shell, not the reader. See docs/design/ps-shell-selection.md §5.3.
 57    shell?: string | null
 58    [key: string]: unknown
 59  }
 60  
 61  export type ParsedMarkdown = {
 62    frontmatter: FrontmatterData
 63    content: string
 64  }
 65  
 66  // Characters that require quoting in YAML values (when unquoted)
 67  // - { } are flow mapping indicators
 68  // - * is anchor/alias indicator
 69  // - [ ] are flow sequence indicators
 70  // - ': ' (colon followed by space) is key indicator — causes 'Nested mappings
 71  //   are not allowed in compact mappings' when it appears mid-value. Match the
 72  //   pattern rather than bare ':' so '12:34' times and 'https://' URLs stay unquoted.
 73  // - # is comment indicator
 74  // - & is anchor indicator
 75  // - ! is tag indicator
 76  // - | > are block scalar indicators (only at start)
 77  // - % is directive indicator (only at start)
 78  // - @ ` are reserved
 79  const YAML_SPECIAL_CHARS = /[{}[\]*&#!|>%@`]|: /
 80  
 81  /**
 82   * Pre-processes frontmatter text to quote values that contain special YAML characters.
 83   * This allows glob patterns like **\/*.{ts,tsx} to be parsed correctly.
 84   */
 85  function quoteProblematicValues(frontmatterText: string): string {
 86    const lines = frontmatterText.split('\n')
 87    const result: string[] = []
 88  
 89    for (const line of lines) {
 90      // Match simple key: value lines (not indented, not list items, not block scalars)
 91      const match = line.match(/^([a-zA-Z_-]+):\s+(.+)$/)
 92      if (match) {
 93        const [, key, value] = match
 94        if (!key || !value) {
 95          result.push(line)
 96          continue
 97        }
 98  
 99        // Skip if already quoted
100        if (
101          (value.startsWith('"') && value.endsWith('"')) ||
102          (value.startsWith("'") && value.endsWith("'"))
103        ) {
104          result.push(line)
105          continue
106        }
107  
108        // Quote if contains special YAML characters
109        if (YAML_SPECIAL_CHARS.test(value)) {
110          // Use double quotes and escape any existing double quotes
111          const escaped = value.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
112          result.push(`${key}: "${escaped}"`)
113          continue
114        }
115      }
116  
117      result.push(line)
118    }
119  
120    return result.join('\n')
121  }
122  
123  export const FRONTMATTER_REGEX = /^---\s*\n([\s\S]*?)---\s*\n?/
124  
125  /**
126   * Parses markdown content to extract frontmatter and content
127   * @param markdown The raw markdown content
128   * @returns Object containing parsed frontmatter and content without frontmatter
129   */
130  export function parseFrontmatter(
131    markdown: string,
132    sourcePath?: string,
133  ): ParsedMarkdown {
134    const match = markdown.match(FRONTMATTER_REGEX)
135  
136    if (!match) {
137      // No frontmatter found
138      return {
139        frontmatter: {},
140        content: markdown,
141      }
142    }
143  
144    const frontmatterText = match[1] || ''
145    const content = markdown.slice(match[0].length)
146  
147    let frontmatter: FrontmatterData = {}
148    try {
149      const parsed = parseYaml(frontmatterText) as FrontmatterData | null
150      if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
151        frontmatter = parsed
152      }
153    } catch {
154      // YAML parsing failed - try again after quoting problematic values
155      try {
156        const quotedText = quoteProblematicValues(frontmatterText)
157        const parsed = parseYaml(quotedText) as FrontmatterData | null
158        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
159          frontmatter = parsed
160        }
161      } catch (retryError) {
162        // Still failed - log for debugging so users can diagnose broken frontmatter
163        const location = sourcePath ? ` in ${sourcePath}` : ''
164        logForDebugging(
165          `Failed to parse YAML frontmatter${location}: ${retryError instanceof Error ? retryError.message : retryError}`,
166          { level: 'warn' },
167        )
168      }
169    }
170  
171    return {
172      frontmatter,
173      content,
174    }
175  }
176  
177  /**
178   * Splits a comma-separated string and expands brace patterns.
179   * Commas inside braces are not treated as separators.
180   * Also accepts a YAML list (string array) for ergonomic frontmatter.
181   * @param input - Comma-separated string, or array of strings, with optional brace patterns
182   * @returns Array of expanded strings
183   * @example
184   * splitPathInFrontmatter("a, b") // returns ["a", "b"]
185   * splitPathInFrontmatter("a, src/*.{ts,tsx}") // returns ["a", "src/*.ts", "src/*.tsx"]
186   * splitPathInFrontmatter("{a,b}/{c,d}") // returns ["a/c", "a/d", "b/c", "b/d"]
187   * splitPathInFrontmatter(["a", "src/*.{ts,tsx}"]) // returns ["a", "src/*.ts", "src/*.tsx"]
188   */
189  export function splitPathInFrontmatter(input: string | string[]): string[] {
190    if (Array.isArray(input)) {
191      return input.flatMap(splitPathInFrontmatter)
192    }
193    if (typeof input !== 'string') {
194      return []
195    }
196    // Split by comma while respecting braces
197    const parts: string[] = []
198    let current = ''
199    let braceDepth = 0
200  
201    for (let i = 0; i < input.length; i++) {
202      const char = input[i]
203  
204      if (char === '{') {
205        braceDepth++
206        current += char
207      } else if (char === '}') {
208        braceDepth--
209        current += char
210      } else if (char === ',' && braceDepth === 0) {
211        // Split here - we're at a comma outside of braces
212        const trimmed = current.trim()
213        if (trimmed) {
214          parts.push(trimmed)
215        }
216        current = ''
217      } else {
218        current += char
219      }
220    }
221  
222    // Add the last part
223    const trimmed = current.trim()
224    if (trimmed) {
225      parts.push(trimmed)
226    }
227  
228    // Expand brace patterns in each part
229    return parts
230      .filter(p => p.length > 0)
231      .flatMap(pattern => expandBraces(pattern))
232  }
233  
234  /**
235   * Expands brace patterns in a glob string.
236   * @example
237   * expandBraces("src/*.{ts,tsx}") // returns ["src/*.ts", "src/*.tsx"]
238   * expandBraces("{a,b}/{c,d}") // returns ["a/c", "a/d", "b/c", "b/d"]
239   */
240  function expandBraces(pattern: string): string[] {
241    // Find the first brace group
242    const braceMatch = pattern.match(/^([^{]*)\{([^}]+)\}(.*)$/)
243  
244    if (!braceMatch) {
245      // No braces found, return pattern as-is
246      return [pattern]
247    }
248  
249    const prefix = braceMatch[1] || ''
250    const alternatives = braceMatch[2] || ''
251    const suffix = braceMatch[3] || ''
252  
253    // Split alternatives by comma and expand each one
254    const parts = alternatives.split(',').map(alt => alt.trim())
255  
256    // Recursively expand remaining braces in suffix
257    const expanded: string[] = []
258    for (const part of parts) {
259      const combined = prefix + part + suffix
260      // Recursively handle additional brace groups
261      const furtherExpanded = expandBraces(combined)
262      expanded.push(...furtherExpanded)
263    }
264  
265    return expanded
266  }
267  
268  /**
269   * Parses a positive integer value from frontmatter.
270   * Handles both number and string representations.
271   *
272   * @param value The raw value from frontmatter (could be number, string, or undefined)
273   * @returns The parsed positive integer, or undefined if invalid or not provided
274   */
275  export function parsePositiveIntFromFrontmatter(
276    value: unknown,
277  ): number | undefined {
278    if (value === undefined || value === null) {
279      return undefined
280    }
281  
282    const parsed = typeof value === 'number' ? value : parseInt(String(value), 10)
283  
284    if (Number.isInteger(parsed) && parsed > 0) {
285      return parsed
286    }
287  
288    return undefined
289  }
290  
291  /**
292   * Validate and coerce a description value from frontmatter.
293   *
294   * Strings are returned as-is (trimmed). Primitive values (numbers, booleans)
295   * are coerced to strings via String(). Non-scalar values (arrays, objects)
296   * are invalid and are logged then omitted. Null, undefined, and
297   * empty/whitespace-only strings return null so callers can fall back to
298   * a default.
299   *
300   * @param value - The raw frontmatter description value
301   * @param componentName - The skill/command/agent/style name for log messages
302   * @param pluginName - The plugin name, if this came from a plugin
303   */
304  export function coerceDescriptionToString(
305    value: unknown,
306    componentName?: string,
307    pluginName?: string,
308  ): string | null {
309    if (value == null) {
310      return null
311    }
312    if (typeof value === 'string') {
313      return value.trim() || null
314    }
315    if (typeof value === 'number' || typeof value === 'boolean') {
316      return String(value)
317    }
318    // Non-scalar descriptions (arrays, objects) are invalid — log and omit
319    const source = pluginName
320      ? `${pluginName}:${componentName}`
321      : (componentName ?? 'unknown')
322    logForDebugging(`Description invalid for ${source} - omitting`, {
323      level: 'warn',
324    })
325    return null
326  }
327  
328  /**
329   * Parse a boolean frontmatter value.
330   * Only returns true for literal true or "true" string.
331   */
332  export function parseBooleanFrontmatter(value: unknown): boolean {
333    return value === true || value === 'true'
334  }
335  
336  /**
337   * Shell values accepted in `shell:` frontmatter for .md `!`-block execution.
338   */
339  export type FrontmatterShell = 'bash' | 'powershell'
340  
341  const FRONTMATTER_SHELLS: readonly FrontmatterShell[] = ['bash', 'powershell']
342  
343  /**
344   * Parse and validate the `shell:` frontmatter field.
345   *
346   * Returns undefined for absent/null/empty (caller defaults to bash).
347   * Logs a warning and returns undefined for unrecognized values — we fall
348   * back to bash rather than failing the skill load, matching how `effort`
349   * and other fields degrade.
350   */
351  export function parseShellFrontmatter(
352    value: unknown,
353    source: string,
354  ): FrontmatterShell | undefined {
355    if (value == null) {
356      return undefined
357    }
358    const normalized = String(value).trim().toLowerCase()
359    if (normalized === '') {
360      return undefined
361    }
362    if ((FRONTMATTER_SHELLS as readonly string[]).includes(normalized)) {
363      return normalized as FrontmatterShell
364    }
365    logForDebugging(
366      `Frontmatter 'shell: ${value}' in ${source} is not recognized. Valid values: ${FRONTMATTER_SHELLS.join(', ')}. Falling back to bash.`,
367      { level: 'warn' },
368    )
369    return undefined
370  }