/ src / lib / server / text-normalization.ts
text-normalization.ts
 1  /**
 2   * Shared text-normalization utilities.
 3   *
 4   * Consolidates the cleanText / cleanMultiline / normalizeList helpers that
 5   * were previously duplicated across 7+ server modules.
 6   */
 7  
 8  /** Collapse whitespace, trim, and cap at `max` characters. Returns `''` for non-string input. */
 9  export function cleanText(value: unknown, max = 320): string {
10    if (typeof value !== 'string') return ''
11    return value.replace(/\s+/g, ' ').trim().slice(0, max)
12  }
13  
14  /** Trim each line, drop blanks, rejoin, and cap at `max` characters. Returns `''` for non-string input. */
15  export function cleanMultiline(value: unknown, max = 1_200): string {
16    if (typeof value !== 'string') return ''
17    return value
18      .split('\n')
19      .map((line) => line.trim())
20      .filter(Boolean)
21      .join('\n')
22      .slice(0, max)
23      .trim()
24  }
25  
26  /** Deduplicated, cleaned list of strings from unknown input. */
27  export function normalizeList(input: unknown, maxItems: number, maxChars = 240): string[] {
28    const values = Array.isArray(input) ? input : []
29    const seen = new Set<string>()
30    const out: string[] = []
31    for (const value of values) {
32      const cleaned = cleanText(value, maxChars)
33      if (!cleaned) continue
34      const key = cleaned.toLowerCase()
35      if (seen.has(key)) continue
36      seen.add(key)
37      out.push(cleaned)
38      if (out.length >= maxItems) break
39    }
40    return out
41  }