text-normalization.ts
1 /** 2 * Shared text-normalization utilities. 3 * 4 * Consolidates the cleanText / cleanMultiline / normalizeList helpers that 5 * were previously duplicated across 7+ server modules. 6 */ 7 8 /** Collapse whitespace, trim, and cap at `max` characters. Returns `''` for non-string input. */ 9 export function cleanText(value: unknown, max = 320): string { 10 if (typeof value !== 'string') return '' 11 return value.replace(/\s+/g, ' ').trim().slice(0, max) 12 } 13 14 /** Trim each line, drop blanks, rejoin, and cap at `max` characters. Returns `''` for non-string input. */ 15 export function cleanMultiline(value: unknown, max = 1_200): string { 16 if (typeof value !== 'string') return '' 17 return value 18 .split('\n') 19 .map((line) => line.trim()) 20 .filter(Boolean) 21 .join('\n') 22 .slice(0, max) 23 .trim() 24 } 25 26 /** Deduplicated, cleaned list of strings from unknown input. */ 27 export function normalizeList(input: unknown, maxItems: number, maxChars = 240): string[] { 28 const values = Array.isArray(input) ? input : [] 29 const seen = new Set<string>() 30 const out: string[] = [] 31 for (const value of values) { 32 const cleaned = cleanText(value, maxChars) 33 if (!cleaned) continue 34 const key = cleaned.toLowerCase() 35 if (seen.has(key)) continue 36 seen.add(key) 37 out.push(cleaned) 38 if (out.length >= maxItems) break 39 } 40 return out 41 }