test_ansi_strip.py
1 """Comprehensive tests for ANSI escape sequence stripping (ECMA-48). 2 3 The strip_ansi function in tools/ansi_strip.py is the source-level fix for 4 ANSI codes leaking into the model's context via terminal/execute_code output. 5 It must strip ALL terminal escape sequences while preserving legitimate text. 6 """ 7 8 from tools.ansi_strip import strip_ansi 9 10 11 class TestStripAnsiBasicSGR: 12 """Select Graphic Rendition — the most common ANSI sequences.""" 13 14 def test_reset(self): 15 assert strip_ansi("\x1b[0m") == "" 16 17 def test_color(self): 18 assert strip_ansi("\x1b[31;1m") == "" 19 20 def test_truecolor_semicolon(self): 21 assert strip_ansi("\x1b[38;2;255;0;0m") == "" 22 23 def test_truecolor_colon_separated(self): 24 """Modern terminals use colon-separated SGR params.""" 25 assert strip_ansi("\x1b[38:2:255:0:0m") == "" 26 assert strip_ansi("\x1b[48:2:0:255:0m") == "" 27 28 29 class TestStripAnsiCSIPrivateMode: 30 """CSI sequences with ? prefix (DEC private modes).""" 31 32 def test_cursor_show_hide(self): 33 assert strip_ansi("\x1b[?25h") == "" 34 assert strip_ansi("\x1b[?25l") == "" 35 36 def test_alt_screen(self): 37 assert strip_ansi("\x1b[?1049h") == "" 38 assert strip_ansi("\x1b[?1049l") == "" 39 40 def test_bracketed_paste(self): 41 assert strip_ansi("\x1b[?2004h") == "" 42 43 44 class TestStripAnsiCSIIntermediate: 45 """CSI sequences with intermediate bytes (space, etc.).""" 46 47 def test_cursor_shape(self): 48 assert strip_ansi("\x1b[0 q") == "" 49 assert strip_ansi("\x1b[2 q") == "" 50 assert strip_ansi("\x1b[6 q") == "" 51 52 53 class TestStripAnsiOSC: 54 """Operating System Command sequences.""" 55 56 def test_bel_terminator(self): 57 assert strip_ansi("\x1b]0;title\x07") == "" 58 59 def test_st_terminator(self): 60 assert strip_ansi("\x1b]0;title\x1b\\") == "" 61 62 def test_hyperlink_preserves_text(self): 63 assert strip_ansi( 64 "\x1b]8;;https://example.com\x1b\\click\x1b]8;;\x1b\\" 65 ) == "click" 66 67 68 class TestStripAnsiDECPrivate: 69 """DEC private / Fp escape sequences.""" 70 71 def test_save_restore_cursor(self): 72 assert strip_ansi("\x1b7") == "" 73 assert strip_ansi("\x1b8") == "" 74 75 def test_keypad_modes(self): 76 assert strip_ansi("\x1b=") == "" 77 assert strip_ansi("\x1b>") == "" 78 79 80 class TestStripAnsiFe: 81 """Fe (C1 as 7-bit) escape sequences.""" 82 83 def test_reverse_index(self): 84 assert strip_ansi("\x1bM") == "" 85 86 def test_reset_terminal(self): 87 assert strip_ansi("\x1bc") == "" 88 89 def test_index_and_newline(self): 90 assert strip_ansi("\x1bD") == "" 91 assert strip_ansi("\x1bE") == "" 92 93 94 class TestStripAnsiNF: 95 """nF (character set selection) sequences.""" 96 97 def test_charset_selection(self): 98 assert strip_ansi("\x1b(A") == "" 99 assert strip_ansi("\x1b(B") == "" 100 assert strip_ansi("\x1b(0") == "" 101 102 103 class TestStripAnsiDCS: 104 """Device Control String sequences.""" 105 106 def test_dcs(self): 107 assert strip_ansi("\x1bP+q\x1b\\") == "" 108 109 110 class TestStripAnsi8BitC1: 111 """8-bit C1 control characters.""" 112 113 def test_8bit_csi(self): 114 assert strip_ansi("\x9b31m") == "" 115 assert strip_ansi("\x9b38;2;255;0;0m") == "" 116 117 def test_8bit_standalone(self): 118 assert strip_ansi("\x9c") == "" 119 assert strip_ansi("\x9d") == "" 120 assert strip_ansi("\x90") == "" 121 122 123 class TestStripAnsiRealWorld: 124 """Real-world contamination scenarios from bug reports.""" 125 126 def test_colored_shebang(self): 127 """The original reported bug: shebang corrupted by color codes.""" 128 assert strip_ansi( 129 "\x1b[32m#!/usr/bin/env python3\x1b[0m\nprint('hello')" 130 ) == "#!/usr/bin/env python3\nprint('hello')" 131 132 def test_stacked_sgr(self): 133 assert strip_ansi( 134 "\x1b[1m\x1b[31m\x1b[42mhello\x1b[0m" 135 ) == "hello" 136 137 def test_ansi_mid_code(self): 138 assert strip_ansi( 139 "def foo(\x1b[33m):\x1b[0m\n return 42" 140 ) == "def foo():\n return 42" 141 142 143 class TestStripAnsiPassthrough: 144 """Clean content must pass through unmodified.""" 145 146 def test_plain_text(self): 147 assert strip_ansi("normal text") == "normal text" 148 149 def test_empty(self): 150 assert strip_ansi("") == "" 151 152 def test_none(self): 153 assert strip_ansi(None) is None 154 155 def test_whitespace_preserved(self): 156 assert strip_ansi("line1\nline2\ttab") == "line1\nline2\ttab" 157 158 def test_unicode_safe(self): 159 assert strip_ansi("emoji 🎉 and ñ café") == "emoji 🎉 and ñ café" 160 161 def test_backslash_in_code(self): 162 code = "path = 'C:\\\\Users\\\\test'" 163 assert strip_ansi(code) == code 164 165 def test_square_brackets_in_code(self): 166 """Array indexing must not be confused with CSI.""" 167 code = "arr[0] = arr[31]" 168 assert strip_ansi(code) == code