/ utils / markdown.ts
markdown.ts
  1  import chalk from 'chalk'
  2  import { marked, type Token, type Tokens } from 'marked'
  3  import stripAnsi from 'strip-ansi'
  4  import { color } from '../components/design-system/color.js'
  5  import { BLOCKQUOTE_BAR } from '../constants/figures.js'
  6  import { stringWidth } from '../ink/stringWidth.js'
  7  import { supportsHyperlinks } from '../ink/supports-hyperlinks.js'
  8  import type { CliHighlight } from './cliHighlight.js'
  9  import { logForDebugging } from './debug.js'
 10  import { createHyperlink } from './hyperlink.js'
 11  import { stripPromptXMLTags } from './messages.js'
 12  import type { ThemeName } from './theme.js'
 13  
 14  // Use \n unconditionally — os.EOL is \r\n on Windows, and the extra \r
 15  // breaks the character-to-segment mapping in applyStylesToWrappedText,
 16  // causing styled text to shift right.
 17  const EOL = '\n'
 18  
 19  let markedConfigured = false
 20  
 21  export function configureMarked(): void {
 22    if (markedConfigured) return
 23    markedConfigured = true
 24  
 25    // Disable strikethrough parsing - the model often uses ~ for "approximate"
 26    // (e.g., ~100) and rarely intends actual strikethrough formatting
 27    marked.use({
 28      tokenizer: {
 29        del() {
 30          return undefined
 31        },
 32      },
 33    })
 34  }
 35  
 36  export function applyMarkdown(
 37    content: string,
 38    theme: ThemeName,
 39    highlight: CliHighlight | null = null,
 40  ): string {
 41    configureMarked()
 42    return marked
 43      .lexer(stripPromptXMLTags(content))
 44      .map(_ => formatToken(_, theme, 0, null, null, highlight))
 45      .join('')
 46      .trim()
 47  }
 48  
 49  export function formatToken(
 50    token: Token,
 51    theme: ThemeName,
 52    listDepth = 0,
 53    orderedListNumber: number | null = null,
 54    parent: Token | null = null,
 55    highlight: CliHighlight | null = null,
 56  ): string {
 57    switch (token.type) {
 58      case 'blockquote': {
 59        const inner = (token.tokens ?? [])
 60          .map(_ => formatToken(_, theme, 0, null, null, highlight))
 61          .join('')
 62        // Prefix each line with a dim vertical bar. Keep text italic but at
 63        // normal brightness — chalk.dim is nearly invisible on dark themes.
 64        const bar = chalk.dim(BLOCKQUOTE_BAR)
 65        return inner
 66          .split(EOL)
 67          .map(line =>
 68            stripAnsi(line).trim() ? `${bar} ${chalk.italic(line)}` : line,
 69          )
 70          .join(EOL)
 71      }
 72      case 'code': {
 73        if (!highlight) {
 74          return token.text + EOL
 75        }
 76        let language = 'plaintext'
 77        if (token.lang) {
 78          if (highlight.supportsLanguage(token.lang)) {
 79            language = token.lang
 80          } else {
 81            logForDebugging(
 82              `Language not supported while highlighting code, falling back to plaintext: ${token.lang}`,
 83            )
 84          }
 85        }
 86        return highlight.highlight(token.text, { language }) + EOL
 87      }
 88      case 'codespan': {
 89        // inline code
 90        return color('permission', theme)(token.text)
 91      }
 92      case 'em':
 93        return chalk.italic(
 94          (token.tokens ?? [])
 95            .map(_ => formatToken(_, theme, 0, null, parent, highlight))
 96            .join(''),
 97        )
 98      case 'strong':
 99        return chalk.bold(
100          (token.tokens ?? [])
101            .map(_ => formatToken(_, theme, 0, null, parent, highlight))
102            .join(''),
103        )
104      case 'heading':
105        switch (token.depth) {
106          case 1: // h1
107            return (
108              chalk.bold.italic.underline(
109                (token.tokens ?? [])
110                  .map(_ => formatToken(_, theme, 0, null, null, highlight))
111                  .join(''),
112              ) +
113              EOL +
114              EOL
115            )
116          case 2: // h2
117            return (
118              chalk.bold(
119                (token.tokens ?? [])
120                  .map(_ => formatToken(_, theme, 0, null, null, highlight))
121                  .join(''),
122              ) +
123              EOL +
124              EOL
125            )
126          default: // h3+
127            return (
128              chalk.bold(
129                (token.tokens ?? [])
130                  .map(_ => formatToken(_, theme, 0, null, null, highlight))
131                  .join(''),
132              ) +
133              EOL +
134              EOL
135            )
136        }
137      case 'hr':
138        return '---'
139      case 'image':
140        return token.href
141      case 'link': {
142        // Prevent mailto links from being displayed as clickable links
143        if (token.href.startsWith('mailto:')) {
144          // Extract email from mailto: link and display as plain text
145          const email = token.href.replace(/^mailto:/, '')
146          return email
147        }
148        // Extract display text from the link's child tokens
149        const linkText = (token.tokens ?? [])
150          .map(_ => formatToken(_, theme, 0, null, token, highlight))
151          .join('')
152        const plainLinkText = stripAnsi(linkText)
153        // If the link has meaningful display text (different from the URL),
154        // show it as a clickable hyperlink. In terminals that support OSC 8,
155        // users see the text and can hover/click to see the URL.
156        if (plainLinkText && plainLinkText !== token.href) {
157          return createHyperlink(token.href, linkText)
158        }
159        // When the display text matches the URL (or is empty), just show the URL
160        return createHyperlink(token.href)
161      }
162      case 'list': {
163        return token.items
164          .map((_: Token, index: number) =>
165            formatToken(
166              _,
167              theme,
168              listDepth,
169              token.ordered ? token.start + index : null,
170              token,
171              highlight,
172            ),
173          )
174          .join('')
175      }
176      case 'list_item':
177        return (token.tokens ?? [])
178          .map(
179            _ =>
180              `${'  '.repeat(listDepth)}${formatToken(_, theme, listDepth + 1, orderedListNumber, token, highlight)}`,
181          )
182          .join('')
183      case 'paragraph':
184        return (
185          (token.tokens ?? [])
186            .map(_ => formatToken(_, theme, 0, null, null, highlight))
187            .join('') + EOL
188        )
189      case 'space':
190        return EOL
191      case 'br':
192        return EOL
193      case 'text':
194        if (parent?.type === 'link') {
195          // Already inside a markdown link — the link handler will wrap this
196          // in an OSC 8 hyperlink. Linkifying here would nest a second OSC 8
197          // sequence, and terminals honor the innermost one, overriding the
198          // link's actual href.
199          return token.text
200        }
201        if (parent?.type === 'list_item') {
202          return `${orderedListNumber === null ? '-' : getListNumber(listDepth, orderedListNumber) + '.'} ${token.tokens ? token.tokens.map(_ => formatToken(_, theme, listDepth, orderedListNumber, token, highlight)).join('') : linkifyIssueReferences(token.text)}${EOL}`
203        }
204        return linkifyIssueReferences(token.text)
205      case 'table': {
206        const tableToken = token as Tokens.Table
207  
208        // Helper function to get the text content that will be displayed (after stripAnsi)
209        function getDisplayText(tokens: Token[] | undefined): string {
210          return stripAnsi(
211            tokens
212              ?.map(_ => formatToken(_, theme, 0, null, null, highlight))
213              .join('') ?? '',
214          )
215        }
216  
217        // Determine column widths based on displayed content (without formatting)
218        const columnWidths = tableToken.header.map((header, index) => {
219          let maxWidth = stringWidth(getDisplayText(header.tokens))
220          for (const row of tableToken.rows) {
221            const cellLength = stringWidth(getDisplayText(row[index]?.tokens))
222            maxWidth = Math.max(maxWidth, cellLength)
223          }
224          return Math.max(maxWidth, 3) // Minimum width of 3
225        })
226  
227        // Format header row
228        let tableOutput = '| '
229        tableToken.header.forEach((header, index) => {
230          const content =
231            header.tokens
232              ?.map(_ => formatToken(_, theme, 0, null, null, highlight))
233              .join('') ?? ''
234          const displayText = getDisplayText(header.tokens)
235          const width = columnWidths[index]!
236          const align = tableToken.align?.[index]
237          tableOutput +=
238            padAligned(content, stringWidth(displayText), width, align) + ' | '
239        })
240        tableOutput = tableOutput.trimEnd() + EOL
241  
242        // Add separator row
243        tableOutput += '|'
244        columnWidths.forEach(width => {
245          // Always use dashes, don't show alignment colons in the output
246          const separator = '-'.repeat(width + 2) // +2 for spaces on each side
247          tableOutput += separator + '|'
248        })
249        tableOutput += EOL
250  
251        // Format data rows
252        tableToken.rows.forEach(row => {
253          tableOutput += '| '
254          row.forEach((cell, index) => {
255            const content =
256              cell.tokens
257                ?.map(_ => formatToken(_, theme, 0, null, null, highlight))
258                .join('') ?? ''
259            const displayText = getDisplayText(cell.tokens)
260            const width = columnWidths[index]!
261            const align = tableToken.align?.[index]
262            tableOutput +=
263              padAligned(content, stringWidth(displayText), width, align) + ' | '
264          })
265          tableOutput = tableOutput.trimEnd() + EOL
266        })
267  
268        return tableOutput + EOL
269      }
270      case 'escape':
271        // Markdown escape: \) → ), \\ → \, etc.
272        return token.text
273      case 'def':
274      case 'del':
275      case 'html':
276        // These token types are not rendered
277        return ''
278    }
279    return ''
280  }
281  
282  // Matches owner/repo#NNN style GitHub issue/PR references. The qualified form
283  // is unambiguous — bare #NNN was removed because it guessed the current repo
284  // and was wrong whenever the assistant discussed a different one.
285  // Owner segment disallows dots (GitHub usernames are alphanumerics + hyphens
286  // only) so hostnames like docs.github.io/guide#42 don't false-positive. Repo
287  // segment allows dots (e.g. cc.kurs.web). Lookbehind is avoided — it defeats
288  // YARR JIT in JSC.
289  const ISSUE_REF_PATTERN =
290    /(^|[^\w./-])([A-Za-z0-9][\w-]*\/[A-Za-z0-9][\w.-]*)#(\d+)\b/g
291  
292  /**
293   * Replaces owner/repo#123 references with clickable hyperlinks to GitHub.
294   */
295  function linkifyIssueReferences(text: string): string {
296    if (!supportsHyperlinks()) {
297      return text
298    }
299    return text.replace(
300      ISSUE_REF_PATTERN,
301      (_match, prefix, repo, num) =>
302        prefix +
303        createHyperlink(
304          `https://github.com/${repo}/issues/${num}`,
305          `${repo}#${num}`,
306        ),
307    )
308  }
309  
310  function numberToLetter(n: number): string {
311    let result = ''
312    while (n > 0) {
313      n--
314      result = String.fromCharCode(97 + (n % 26)) + result
315      n = Math.floor(n / 26)
316    }
317    return result
318  }
319  
320  const ROMAN_VALUES: ReadonlyArray<[number, string]> = [
321    [1000, 'm'],
322    [900, 'cm'],
323    [500, 'd'],
324    [400, 'cd'],
325    [100, 'c'],
326    [90, 'xc'],
327    [50, 'l'],
328    [40, 'xl'],
329    [10, 'x'],
330    [9, 'ix'],
331    [5, 'v'],
332    [4, 'iv'],
333    [1, 'i'],
334  ]
335  
336  function numberToRoman(n: number): string {
337    let result = ''
338    for (const [value, numeral] of ROMAN_VALUES) {
339      while (n >= value) {
340        result += numeral
341        n -= value
342      }
343    }
344    return result
345  }
346  
347  function getListNumber(listDepth: number, orderedListNumber: number): string {
348    switch (listDepth) {
349      case 0:
350      case 1:
351        return orderedListNumber.toString()
352      case 2:
353        return numberToLetter(orderedListNumber)
354      case 3:
355        return numberToRoman(orderedListNumber)
356      default:
357        return orderedListNumber.toString()
358    }
359  }
360  
361  /**
362   * Pad `content` to `targetWidth` according to alignment. `displayWidth` is the
363   * visible width of `content` (caller computes this, e.g. via stringWidth on
364   * stripAnsi'd text, so ANSI codes in `content` don't affect padding).
365   */
366  export function padAligned(
367    content: string,
368    displayWidth: number,
369    targetWidth: number,
370    align: 'left' | 'center' | 'right' | null | undefined,
371  ): string {
372    const padding = Math.max(0, targetWidth - displayWidth)
373    if (align === 'center') {
374      const leftPad = Math.floor(padding / 2)
375      return ' '.repeat(leftPad) + content + ' '.repeat(padding - leftPad)
376    }
377    if (align === 'right') {
378      return ' '.repeat(padding) + content
379    }
380    return content + ' '.repeat(padding)
381  }