/ utils / sanitization.ts
sanitization.ts
 1  /**
 2   * Unicode Sanitization for Hidden Character Attack Mitigation
 3   *
 4   * This module implements security measures against Unicode-based hidden character attacks,
 5   * specifically targeting ASCII Smuggling and Hidden Prompt Injection vulnerabilities.
 6   * These attacks use invisible Unicode characters (such as Tag characters, format controls,
 7   * private use areas, and noncharacters) to hide malicious instructions that are invisible
 8   * to users but processed by AI models.
 9   *
10   * The vulnerability was demonstrated in HackerOne report #3086545 targeting Claude Desktop's
11   * MCP (Model Context Protocol) implementation, where attackers could inject hidden instructions
12   * using Unicode Tag characters that would be executed by Claude but remain invisible to users.
13   *
14   * Reference: https://embracethered.com/blog/posts/2024/hiding-and-finding-text-with-unicode-tags/
15   *
16   * This implementation provides comprehensive protection by:
17   * 1. Applying NFKC Unicode normalization to handle composed character sequences
18   * 2. Removing dangerous Unicode categories while preserving legitimate text and formatting
19   * 3. Supporting recursive sanitization of complex nested data structures
20   * 4. Maintaining performance with efficient regex processing
21   *
22   * The sanitization is always enabled to protect against these attacks.
23   */
24  
25  export function partiallySanitizeUnicode(prompt: string): string {
26    let current = prompt
27    let previous = ''
28    let iterations = 0
29    const MAX_ITERATIONS = 10 // Safety limit to prevent infinite loops
30  
31    // Iteratively sanitize until no more changes occur or max iterations reached
32    while (current !== previous && iterations < MAX_ITERATIONS) {
33      previous = current
34  
35      // Apply NFKC normalization to handle composed character sequences
36      current = current.normalize('NFKC')
37  
38      // Remove dangerous Unicode categories using explicit character ranges
39  
40      // Method 1: Strip dangerous Unicode property classes
41      // This is the primary defence and is the solution that is widely used in OSS libraries.
42      current = current.replace(/[\p{Cf}\p{Co}\p{Cn}]/gu, '')
43  
44      // Method 2: Explicit character ranges. There are some subtle issues with the above method
45      // failing in certain environments that don't support regexes for unicode property classes,
46      // so we also implement a fallback that strips out some specifically known dangerous ranges.
47      current = current
48        .replace(/[\u200B-\u200F]/g, '') // Zero-width spaces, LTR/RTL marks
49        .replace(/[\u202A-\u202E]/g, '') // Directional formatting characters
50        .replace(/[\u2066-\u2069]/g, '') // Directional isolates
51        .replace(/[\uFEFF]/g, '') // Byte order mark
52        .replace(/[\uE000-\uF8FF]/g, '') // Basic Multilingual Plane private use
53  
54      iterations++
55    }
56  
57    // If we hit max iterations, crash loudly. This should only ever happen if there is a bug or if someone purposefully created a deeply nested unicode string.
58    if (iterations >= MAX_ITERATIONS) {
59      throw new Error(
60        `Unicode sanitization reached maximum iterations (${MAX_ITERATIONS}) for input: ${prompt.slice(0, 100)}`,
61      )
62    }
63  
64    return current
65  }
66  
67  export function recursivelySanitizeUnicode(value: string): string
68  export function recursivelySanitizeUnicode<T>(value: T[]): T[]
69  export function recursivelySanitizeUnicode<T extends object>(value: T): T
70  export function recursivelySanitizeUnicode<T>(value: T): T
71  export function recursivelySanitizeUnicode(value: unknown): unknown {
72    if (typeof value === 'string') {
73      return partiallySanitizeUnicode(value)
74    }
75  
76    if (Array.isArray(value)) {
77      return value.map(recursivelySanitizeUnicode)
78    }
79  
80    if (value !== null && typeof value === 'object') {
81      const sanitized: Record<string, unknown> = {}
82      for (const [key, val] of Object.entries(value)) {
83        sanitized[recursivelySanitizeUnicode(key)] =
84          recursivelySanitizeUnicode(val)
85      }
86      return sanitized
87    }
88  
89    // Return other primitive values (numbers, booleans, null, undefined) unchanged
90    return value
91  }