/ tests / tools / test_ansi_strip.py
test_ansi_strip.py
  1  """Comprehensive tests for ANSI escape sequence stripping (ECMA-48).
  2  
  3  The strip_ansi function in tools/ansi_strip.py is the source-level fix for
  4  ANSI codes leaking into the model's context via terminal/execute_code output.
  5  It must strip ALL terminal escape sequences while preserving legitimate text.
  6  """
  7  
  8  from tools.ansi_strip import strip_ansi
  9  
 10  
 11  class TestStripAnsiBasicSGR:
 12      """Select Graphic Rendition — the most common ANSI sequences."""
 13  
 14      def test_reset(self):
 15          assert strip_ansi("\x1b[0m") == ""
 16  
 17      def test_color(self):
 18          assert strip_ansi("\x1b[31;1m") == ""
 19  
 20      def test_truecolor_semicolon(self):
 21          assert strip_ansi("\x1b[38;2;255;0;0m") == ""
 22  
 23      def test_truecolor_colon_separated(self):
 24          """Modern terminals use colon-separated SGR params."""
 25          assert strip_ansi("\x1b[38:2:255:0:0m") == ""
 26          assert strip_ansi("\x1b[48:2:0:255:0m") == ""
 27  
 28  
 29  class TestStripAnsiCSIPrivateMode:
 30      """CSI sequences with ? prefix (DEC private modes)."""
 31  
 32      def test_cursor_show_hide(self):
 33          assert strip_ansi("\x1b[?25h") == ""
 34          assert strip_ansi("\x1b[?25l") == ""
 35  
 36      def test_alt_screen(self):
 37          assert strip_ansi("\x1b[?1049h") == ""
 38          assert strip_ansi("\x1b[?1049l") == ""
 39  
 40      def test_bracketed_paste(self):
 41          assert strip_ansi("\x1b[?2004h") == ""
 42  
 43  
 44  class TestStripAnsiCSIIntermediate:
 45      """CSI sequences with intermediate bytes (space, etc.)."""
 46  
 47      def test_cursor_shape(self):
 48          assert strip_ansi("\x1b[0 q") == ""
 49          assert strip_ansi("\x1b[2 q") == ""
 50          assert strip_ansi("\x1b[6 q") == ""
 51  
 52  
 53  class TestStripAnsiOSC:
 54      """Operating System Command sequences."""
 55  
 56      def test_bel_terminator(self):
 57          assert strip_ansi("\x1b]0;title\x07") == ""
 58  
 59      def test_st_terminator(self):
 60          assert strip_ansi("\x1b]0;title\x1b\\") == ""
 61  
 62      def test_hyperlink_preserves_text(self):
 63          assert strip_ansi(
 64              "\x1b]8;;https://example.com\x1b\\click\x1b]8;;\x1b\\"
 65          ) == "click"
 66  
 67  
 68  class TestStripAnsiDECPrivate:
 69      """DEC private / Fp escape sequences."""
 70  
 71      def test_save_restore_cursor(self):
 72          assert strip_ansi("\x1b7") == ""
 73          assert strip_ansi("\x1b8") == ""
 74  
 75      def test_keypad_modes(self):
 76          assert strip_ansi("\x1b=") == ""
 77          assert strip_ansi("\x1b>") == ""
 78  
 79  
 80  class TestStripAnsiFe:
 81      """Fe (C1 as 7-bit) escape sequences."""
 82  
 83      def test_reverse_index(self):
 84          assert strip_ansi("\x1bM") == ""
 85  
 86      def test_reset_terminal(self):
 87          assert strip_ansi("\x1bc") == ""
 88  
 89      def test_index_and_newline(self):
 90          assert strip_ansi("\x1bD") == ""
 91          assert strip_ansi("\x1bE") == ""
 92  
 93  
 94  class TestStripAnsiNF:
 95      """nF (character set selection) sequences."""
 96  
 97      def test_charset_selection(self):
 98          assert strip_ansi("\x1b(A") == ""
 99          assert strip_ansi("\x1b(B") == ""
100          assert strip_ansi("\x1b(0") == ""
101  
102  
103  class TestStripAnsiDCS:
104      """Device Control String sequences."""
105  
106      def test_dcs(self):
107          assert strip_ansi("\x1bP+q\x1b\\") == ""
108  
109  
110  class TestStripAnsi8BitC1:
111      """8-bit C1 control characters."""
112  
113      def test_8bit_csi(self):
114          assert strip_ansi("\x9b31m") == ""
115          assert strip_ansi("\x9b38;2;255;0;0m") == ""
116  
117      def test_8bit_standalone(self):
118          assert strip_ansi("\x9c") == ""
119          assert strip_ansi("\x9d") == ""
120          assert strip_ansi("\x90") == ""
121  
122  
123  class TestStripAnsiRealWorld:
124      """Real-world contamination scenarios from bug reports."""
125  
126      def test_colored_shebang(self):
127          """The original reported bug: shebang corrupted by color codes."""
128          assert strip_ansi(
129              "\x1b[32m#!/usr/bin/env python3\x1b[0m\nprint('hello')"
130          ) == "#!/usr/bin/env python3\nprint('hello')"
131  
132      def test_stacked_sgr(self):
133          assert strip_ansi(
134              "\x1b[1m\x1b[31m\x1b[42mhello\x1b[0m"
135          ) == "hello"
136  
137      def test_ansi_mid_code(self):
138          assert strip_ansi(
139              "def foo(\x1b[33m):\x1b[0m\n    return 42"
140          ) == "def foo():\n    return 42"
141  
142  
143  class TestStripAnsiPassthrough:
144      """Clean content must pass through unmodified."""
145  
146      def test_plain_text(self):
147          assert strip_ansi("normal text") == "normal text"
148  
149      def test_empty(self):
150          assert strip_ansi("") == ""
151  
152      def test_none(self):
153          assert strip_ansi(None) is None
154  
155      def test_whitespace_preserved(self):
156          assert strip_ansi("line1\nline2\ttab") == "line1\nline2\ttab"
157  
158      def test_unicode_safe(self):
159          assert strip_ansi("emoji 🎉 and ñ café") == "emoji 🎉 and ñ café"
160  
161      def test_backslash_in_code(self):
162          code = "path = 'C:\\\\Users\\\\test'"
163          assert strip_ansi(code) == code
164  
165      def test_square_brackets_in_code(self):
166          """Array indexing must not be confused with CSI."""
167          code = "arr[0] = arr[31]"
168          assert strip_ansi(code) == code