snapshotFormatter.test.ts
1 /** 2 * Tests for snapshotFormatter.ts: snapshot tree filtering. 3 * 4 * Uses sanitized excerpts from real websites (GitHub, Bilibili, Twitter) 5 * to validate noise filtering, annotation stripping, and output quality. 6 */ 7 8 import { describe, it, expect } from 'vitest'; 9 import { formatSnapshot } from './snapshotFormatter.js'; 10 11 // --------------------------------------------------------------------------- 12 // Fixtures: sanitized excerpts from real aria snapshots 13 // --------------------------------------------------------------------------- 14 15 /** GitHub dashboard navigation bar (generic-heavy, refs, /url: lines) */ 16 const GITHUB_NAV = `\ 17 - generic [ref=e2]: 18 - region 19 - generic [ref=e3]: 20 - link "Skip to content" [ref=e4] [cursor=pointer]: 21 - /url: "#start-of-content" 22 - banner "Global Navigation Menu" [ref=e8]: 23 - generic [ref=e9]: 24 - generic [ref=e10]: 25 - button "Open menu" [ref=e12] [cursor=pointer]: 26 - img [ref=e13] 27 - link "Homepage" [ref=e15] [cursor=pointer]: 28 - /url: / 29 - img [ref=e16] 30 - generic [ref=e18]: 31 - navigation "Breadcrumbs" [ref=e19]: 32 - list [ref=e20]: 33 - listitem [ref=e21]: 34 - link "Dashboard" [ref=e22] [cursor=pointer]: 35 - /url: https://github.com/ 36 - generic [ref=e23]: Dashboard 37 - button "Search or jump to…" [ref=e26] [cursor=pointer]: 38 - generic [ref=e27]: 39 - generic: 40 - img 41 - generic [ref=e28]: 42 - generic: 43 - text: Type 44 - generic: / 45 - text: to search`; 46 47 /** GitHub repo list sidebar (repetitive structure) */ 48 const GITHUB_REPOS = `\ 49 - navigation "Repositories" [ref=e79]: 50 - generic [ref=e80]: 51 - generic [ref=e81]: 52 - heading "Top repositories" [level=2] [ref=e82] 53 - link "New" [ref=e83] [cursor=pointer]: 54 - /url: /new 55 - generic [ref=e84]: 56 - generic: 57 - img 58 - generic [ref=e85]: New 59 - search "Top repositories" [ref=e86]: 60 - textbox "Find a repository…" [ref=e87] 61 - list [ref=e88]: 62 - listitem [ref=e89]: 63 - generic [ref=e90]: 64 - link "Repository" [ref=e91] [cursor=pointer]: 65 - /url: /jackwener/twitter-cli 66 - img "Repository" [ref=e92] 67 - link "jackwener/twitter-cli" [ref=e94] [cursor=pointer]: 68 - /url: /jackwener/twitter-cli 69 - listitem [ref=e95]: 70 - generic [ref=e96]: 71 - link "Repository" [ref=e97] [cursor=pointer]: 72 - /url: /jackwener/opencli 73 - img "Repository" [ref=e98] 74 - link "jackwener/opencli" [ref=e100] [cursor=pointer]: 75 - /url: /jackwener/opencli`; 76 77 /** Bilibili nav bar (Chinese text, multiple link categories) */ 78 const BILIBILI_NAV = `\ 79 - generic [ref=e3]: 80 - generic [ref=e4]: 81 - generic [ref=e5]: 82 - list [ref=e6]: 83 - listitem [ref=e7]: 84 - link "首页" [ref=e8] [cursor=pointer]: 85 - /url: //www.bilibili.com 86 - img [ref=e9] 87 - generic [ref=e11]: 首页 88 - listitem [ref=e12]: 89 - link "番剧" [ref=e13] [cursor=pointer]: 90 - /url: //www.bilibili.com/anime/ 91 - listitem [ref=e14]: 92 - link "直播" [ref=e15] [cursor=pointer]: 93 - /url: //live.bilibili.com 94 - generic [ref=e32]: 95 - textbox "冷知识 金廷26年胜率100%" [ref=e34] 96 - img [ref=e36] [cursor=pointer]`; 97 98 /** Bilibili video card (deeply nested generic wrappers, view counts) */ 99 const BILIBILI_VIDEO = `\ 100 - generic [ref=e363]: 101 - link "超酷时刻 即将到来 3.3万 40 16:24" [ref=e364] [cursor=pointer]: 102 - /url: https://www.bilibili.com/video/BV1zVw5zoEFt 103 - generic [ref=e365]: 104 - img "超酷时刻 即将到来" [ref=e368] 105 - generic: 106 - generic: 107 - generic: 108 - generic: 109 - img 110 - generic: 3.3万 111 - generic: 112 - img 113 - generic: "40" 114 - generic: 16:24 115 - generic [ref=e370]: 116 - heading "超酷时刻 即将到来" [level=3] [ref=e371]: 117 - link "超酷时刻 即将到来" [ref=e372] [cursor=pointer]: 118 - /url: https://www.bilibili.com/video/BV1zVw5zoEFt 119 - link "Tesla特斯拉中国 · 13小时前" [ref=e374] [cursor=pointer]: 120 - /url: //space.bilibili.com/491190876 121 - img [ref=e375] 122 - generic "Tesla特斯拉中国" [ref=e379] 123 - generic [ref=e380]: · 13小时前`; 124 125 /** Empty paragraph blocks (Bilibili bottom section) */ 126 const BILIBILI_EMPTY = `\ 127 - generic [ref=e576]: 128 - generic: 129 - generic: 130 - generic: 131 - paragraph 132 - paragraph 133 - paragraph 134 - generic [ref=e577]: 135 - generic: 136 - generic: 137 - generic: 138 - paragraph 139 - paragraph 140 - paragraph`; 141 142 /** Twitter-style feed item (simulated based on common patterns) */ 143 const TWITTER_TWEET = `\ 144 - main [ref=e100]: 145 - region "Timeline" [ref=e101]: 146 - article [ref=e200]: 147 - generic [ref=e201]: 148 - generic [ref=e202]: 149 - link "@elonmusk" [ref=e203] [cursor=pointer]: 150 - /url: /elonmusk 151 - img "@elonmusk" [ref=e204] 152 - generic [ref=e205]: 153 - generic [ref=e206]: Elon Musk 154 - generic [ref=e207]: @elonmusk 155 - generic [ref=e208]: 156 - generic [ref=e209]: This is a very long tweet that goes on and on about various things including technology, space, and other random topics that make this text exceed any reasonable length limit we might want to set for display purposes in a CLI interface. 157 - generic [ref=e210]: 158 - button "Reply" [ref=e211] [cursor=pointer]: 159 - img [ref=e212] 160 - generic [ref=e213]: "42" 161 - button "Retweet" [ref=e214] [cursor=pointer]: 162 - img [ref=e215] 163 - generic [ref=e216]: "1.2K" 164 - button "Like" [ref=e217] [cursor=pointer]: 165 - img [ref=e218] 166 - generic [ref=e219]: "5.3K" 167 - separator [ref=e300]`; 168 169 // --------------------------------------------------------------------------- 170 // Tests 171 // --------------------------------------------------------------------------- 172 173 describe('formatSnapshot', () => { 174 describe('basic behavior', () => { 175 it('returns empty string for empty/null input', () => { 176 expect(formatSnapshot('')).toBe(''); 177 expect(formatSnapshot(null as unknown as string)).toBe(''); 178 expect(formatSnapshot(undefined as unknown as string)).toBe(''); 179 }); 180 181 it('strips [ref=...] and [cursor=...] annotations', () => { 182 const input = '- button "Click me" [ref=e42] [cursor=pointer]'; 183 const result = formatSnapshot(input); 184 expect(result).not.toContain('[ref='); 185 expect(result).not.toContain('[cursor='); 186 expect(result).toContain('button "Click me"'); 187 }); 188 189 it('removes /url: metadata lines', () => { 190 const input = `\ 191 - link "Home" [ref=e1] [cursor=pointer]: 192 - /url: https://example.com 193 - generic [ref=e2]: Home`; 194 const result = formatSnapshot(input); 195 expect(result).not.toContain('/url:'); 196 expect(result).not.toContain('https://example.com'); 197 }); 198 199 it('assigns sequential [@N] refs to interactive elements', () => { 200 const input = `\ 201 - button "Save" [ref=e1] 202 - link "Cancel" [ref=e2] 203 - textbox "Name" [ref=e3]`; 204 const result = formatSnapshot(input); 205 expect(result).toContain('[@1] button "Save"'); 206 expect(result).toContain('[@2] link "Cancel"'); 207 expect(result).toContain('[@3] textbox "Name"'); 208 }); 209 }); 210 211 describe('noise filtering', () => { 212 it('removes generic nodes without text', () => { 213 const input = `\ 214 - generic [ref=e1]: 215 - generic [ref=e2]: 216 - button "Click" [ref=e3]`; 217 const result = formatSnapshot(input); 218 expect(result).not.toMatch(/^generic/m); 219 expect(result).toContain('button "Click"'); 220 }); 221 222 it('keeps generic nodes WITH text content', () => { 223 const input = '- generic [ref=e23]: Dashboard'; 224 const result = formatSnapshot(input); 225 expect(result).toContain('generic: Dashboard'); 226 }); 227 228 it('removes img nodes without alt text', () => { 229 const input = `\ 230 - img [ref=e13] 231 - img "Profile photo" [ref=e14]`; 232 const result = formatSnapshot(input); 233 expect(result).not.toContain('img\n'); 234 expect(result).toContain('img "Profile photo"'); 235 }); 236 237 it('removes separator nodes', () => { 238 const input = '- separator [ref=e304]'; 239 const result = formatSnapshot(input); 240 expect(result).toBe(''); 241 }); 242 243 it('removes presentation/none roles', () => { 244 const input = `\ 245 - presentation [ref=e1] 246 - none [ref=e2] 247 - button "OK" [ref=e3]`; 248 const result = formatSnapshot(input); 249 expect(result).not.toContain('presentation'); 250 expect(result).not.toContain('none'); 251 expect(result).toContain('button "OK"'); 252 }); 253 }); 254 255 describe('empty container pruning', () => { 256 it('prunes containers with no visible children', () => { 257 const input = `\ 258 - list [ref=e88]: 259 - listitem [ref=e89]: 260 - generic [ref=e90]: 261 - img [ref=e91]`; 262 // After filtering: generic (no text) → removed, img (no alt) → removed 263 // listitem becomes empty → pruned, list becomes empty → pruned 264 const result = formatSnapshot(input); 265 expect(result).toBe(''); 266 }); 267 268 it('keeps containers with visible children', () => { 269 const input = `\ 270 - list [ref=e1]: 271 - listitem [ref=e2]: 272 - link "Home" [ref=e3]`; 273 const result = formatSnapshot(input); 274 expect(result).toContain('list'); 275 expect(result).toContain('listitem'); 276 expect(result).toContain('link "Home"'); 277 }); 278 }); 279 280 describe('maxDepth option', () => { 281 it('limits output to specified depth', () => { 282 const input = `\ 283 - main [ref=e1]: 284 - heading "Dashboard" [ref=e2] 285 - navigation [ref=e3]: 286 - list [ref=e4]: 287 - link "Deep link" [ref=e5]`; 288 const result = formatSnapshot(input, { maxDepth: 2 }); 289 expect(result).toContain('main'); 290 expect(result).toContain('heading "Dashboard"'); 291 // navigation is pruned: its only child list is empty after link is excluded by maxDepth 292 expect(result).not.toContain('navigation'); 293 expect(result).not.toContain('Deep link'); 294 }); 295 296 it('handles maxDepth=0 correctly (was a bug)', () => { 297 const input = `\ 298 - heading "Title" [ref=e1] 299 - link "Sub" [ref=e2]`; 300 const result = formatSnapshot(input, { maxDepth: 0 }); 301 expect(result).toContain('heading "Title"'); 302 expect(result).not.toContain('Sub'); 303 }); 304 }); 305 306 describe('interactive mode', () => { 307 it('keeps interactive elements and landmarks', () => { 308 const result = formatSnapshot(GITHUB_NAV, { interactive: true }); 309 // Interactive elements should be present 310 expect(result).toContain('button'); 311 expect(result).toContain('link'); 312 // Landmarks preserved 313 expect(result).toContain('banner'); 314 expect(result).toContain('navigation'); 315 }); 316 317 it('filters non-interactive, non-landmark, textless nodes', () => { 318 const input = `\ 319 - main [ref=e1]: 320 - generic [ref=e2]: 321 - generic [ref=e3]: 322 - button "Save" [ref=e4] 323 - generic [ref=e5]: some text content`; 324 const result = formatSnapshot(input, { interactive: true }); 325 expect(result).toContain('main'); 326 expect(result).toContain('button "Save"'); 327 // generic with text is kept 328 expect(result).toContain('generic: some text content'); 329 }); 330 }); 331 332 describe('compact mode', () => { 333 it('strips bracket annotations and collapses whitespace', () => { 334 const input = '- button "Save" [ref=e1] [cursor=pointer] [level=2]'; 335 const result = formatSnapshot(input, { compact: true }); 336 // ref/cursor already stripped, but [level=...] should also go in compact 337 expect(result).not.toContain('[level='); 338 expect(result).toContain('button'); 339 }); 340 }); 341 342 describe('maxTextLength option', () => { 343 it('truncates long content lines', () => { 344 const input = '- heading "This is a very long heading that should be truncated at some point" [ref=e1]'; 345 const result = formatSnapshot(input, { maxTextLength: 30 }); 346 expect(result.length).toBeLessThanOrEqual(35); // some tolerance for ellipsis 347 expect(result).toContain('…'); 348 }); 349 }); 350 351 // --------------------------------------------------------------------------- 352 // Real-world snapshot integration tests 353 // --------------------------------------------------------------------------- 354 355 describe('GitHub snapshot', () => { 356 it('drastically reduces nav bar output', () => { 357 const raw = GITHUB_NAV; 358 const rawLineCount = raw.split('\n').length; 359 const result = formatSnapshot(raw); 360 const resultLineCount = result.split('\n').length; 361 362 // Should significantly reduce line count 363 expect(resultLineCount).toBeLessThan(rawLineCount); 364 365 // Key content preserved 366 expect(result).toContain('link "Skip to content"'); 367 expect(result).toContain('banner "Global Navigation Menu"'); 368 expect(result).toContain('link "Dashboard"'); 369 expect(result).toContain('button "Search or jump to…"'); 370 371 // Noise removed 372 expect(result).not.toContain('[ref='); 373 expect(result).not.toContain('/url:'); 374 }); 375 376 it('preserves repo list structure', () => { 377 const result = formatSnapshot(GITHUB_REPOS); 378 expect(result).toContain('navigation "Repositories"'); 379 expect(result).toContain('heading "Top repositories"'); 380 expect(result).toContain('textbox "Find a repository…"'); 381 expect(result).toContain('link "jackwener/twitter-cli"'); 382 expect(result).toContain('link "jackwener/opencli"'); 383 expect(result).toContain('img "Repository"'); 384 385 // No refs or urls 386 expect(result).not.toContain('[ref='); 387 expect(result).not.toContain('/url:'); 388 }); 389 }); 390 391 describe('Bilibili snapshot', () => { 392 it('cleans nav bar with Chinese text', () => { 393 const result = formatSnapshot(BILIBILI_NAV); 394 expect(result).toContain('link "首页"'); 395 expect(result).toContain('link "番剧"'); 396 expect(result).toContain('link "直播"'); 397 expect(result).toContain('textbox "冷知识 金廷26年胜率100%"'); 398 expect(result).not.toContain('[ref='); 399 }); 400 401 it('handles video card with deeply nested wrappers', () => { 402 const result = formatSnapshot(BILIBILI_VIDEO); 403 expect(result).toContain('link "超酷时刻 即将到来 3.3万 40 16:24"'); 404 expect(result).toContain('heading "超酷时刻 即将到来"'); 405 expect(result).toContain('generic "Tesla特斯拉中国"'); 406 407 // Deeply nested view count generics with text are kept 408 expect(result).toContain('3.3万'); 409 }); 410 411 it('prunes empty paragraph blocks', () => { 412 const result = formatSnapshot(BILIBILI_EMPTY); 413 // All content is generic (no text) and empty paragraphs 414 // After noise filtering, everything should be pruned 415 expect(result.trim()).toBe(''); 416 }); 417 }); 418 419 describe('Twitter snapshot', () => { 420 it('preserves tweet structure', () => { 421 const result = formatSnapshot(TWITTER_TWEET); 422 expect(result).toContain('main'); 423 expect(result).toContain('region "Timeline"'); 424 expect(result).toContain('link "@elonmusk"'); 425 expect(result).toContain('button "Reply"'); 426 expect(result).toContain('button "Like"'); 427 expect(result).not.toContain('separator'); 428 }); 429 430 it('truncates long tweet text with maxTextLength', () => { 431 const result = formatSnapshot(TWITTER_TWEET, { maxTextLength: 60 }); 432 // The long tweet text should be truncated 433 expect(result).toContain('…'); 434 // But short elements are unaffected 435 expect(result).toContain('button "Reply"'); 436 }); 437 438 it('interactive mode keeps only buttons and links', () => { 439 const result = formatSnapshot(TWITTER_TWEET, { interactive: true }); 440 expect(result).toContain('link "@elonmusk"'); 441 expect(result).toContain('button "Reply"'); 442 expect(result).toContain('button "Retweet"'); 443 expect(result).toContain('button "Like"'); 444 // Structural landmarks kept 445 expect(result).toContain('main'); 446 expect(result).toContain('region "Timeline"'); 447 expect(result).toContain('article'); 448 }); 449 450 it('combined options: interactive + maxDepth', () => { 451 // With maxDepth: 2 and interactive, depth > 2 is filtered. 452 // article at depth 2 has only generic children (noise-filtered), 453 // so article gets pruned by container pruning, which cascades up. 454 const result = formatSnapshot(TWITTER_TWEET, { interactive: true, maxDepth: 2 }); 455 expect(result).toContain('main'); 456 expect(result).not.toContain('button "Reply"'); 457 expect(result).not.toContain('link "@elonmusk"'); 458 }); 459 }); 460 461 describe('reduction ratios on real data', () => { 462 it('achieves significant reduction on GitHub nav', () => { 463 const rawLines = GITHUB_NAV.split('\n').length; 464 const formatted = formatSnapshot(GITHUB_NAV); 465 const formattedLines = formatted.split('\n').filter(l => l.trim()).length; 466 // Expect at least 40% reduction 467 expect(formattedLines).toBeLessThan(rawLines * 0.6); 468 }); 469 470 it('achieves significant reduction on Bilibili video card', () => { 471 const rawLines = BILIBILI_VIDEO.split('\n').length; 472 const formatted = formatSnapshot(BILIBILI_VIDEO); 473 const formattedLines = formatted.split('\n').filter(l => l.trim()).length; 474 // Expect at least 30% reduction 475 expect(formattedLines).toBeLessThan(rawLines * 0.7); 476 }); 477 }); 478 479 // --------------------------------------------------------------------------- 480 // Full-page snapshot fixture tests (loaded from __fixtures__/) 481 // --------------------------------------------------------------------------- 482 483 describe('full-page snapshots from fixtures', () => { 484 const fs = require('node:fs'); 485 const path = require('node:path'); 486 const fixturesDir = path.join(__dirname, '__fixtures__'); 487 488 function loadFixture(name: string): string | null { 489 const p = path.join(fixturesDir, name); 490 if (!fs.existsSync(p)) return null; 491 return fs.readFileSync(p, 'utf-8'); 492 } 493 494 it('GitHub: significant reduction and clean output', () => { 495 const raw = loadFixture('snapshot_github.txt'); 496 if (!raw) return; 497 const rawLines = raw.split('\n').length; 498 const result = formatSnapshot(raw); 499 const resultLines = result.split('\n').filter((l: string) => l.trim()).length; 500 501 // Should achieve > 50% reduction on GitHub dashboard (heavy generic noise) 502 expect(resultLines).toBeLessThan(rawLines * 0.5); 503 504 // No annotations remain 505 expect(result).not.toContain('[ref='); 506 expect(result).not.toContain('[cursor='); 507 expect(result).not.toContain('/url:'); 508 509 // Key content preserved 510 expect(result).toContain('link "Skip to content"'); 511 expect(result).toContain('banner "Global Navigation Menu"'); 512 expect(result).toContain('heading "Dashboard"'); 513 }); 514 515 it('Bilibili: significant reduction and Chinese text preserved', () => { 516 const raw = loadFixture('snapshot_bilibili.txt'); 517 if (!raw) return; 518 const rawLines = raw.split('\n').length; 519 const result = formatSnapshot(raw); 520 const resultLines = result.split('\n').filter((l: string) => l.trim()).length; 521 522 // Should achieve > 40% reduction on Bilibili (lots of imgs and generics) 523 expect(resultLines).toBeLessThan(rawLines * 0.6); 524 525 // No annotations remain 526 expect(result).not.toContain('[ref='); 527 expect(result).not.toContain('[cursor='); 528 529 // Chinese text preserved 530 expect(result).toContain('link "首页"'); 531 expect(result).toContain('link "番剧"'); 532 }); 533 534 it('Twitter/X: significant reduction and tweet structure preserved', () => { 535 const raw = loadFixture('snapshot_twitter.txt'); 536 if (!raw) return; 537 const rawLines = raw.split('\n').length; 538 const result = formatSnapshot(raw); 539 const resultLines = result.split('\n').filter((l: string) => l.trim()).length; 540 541 // Should achieve > 40% reduction on Twitter/X 542 expect(resultLines).toBeLessThan(rawLines * 0.6); 543 544 // No annotations remain 545 expect(result).not.toContain('[ref='); 546 expect(result).not.toContain('[cursor='); 547 expect(result).not.toContain('/url:'); 548 549 // Key structure preserved 550 expect(result).toContain('main'); 551 }); 552 553 it('GitHub interactive mode: drastic reduction', () => { 554 const raw = loadFixture('snapshot_github.txt'); 555 if (!raw) return; 556 const result = formatSnapshot(raw, { interactive: true }); 557 const resultLines = result.split('\n').filter((l: string) => l.trim()).length; 558 559 // Interactive mode should be much more aggressive 560 expect(resultLines).toBeLessThan(200); 561 562 // Interactive elements still present 563 expect(result).toContain('button'); 564 expect(result).toContain('link'); 565 expect(result).toContain('textbox'); 566 }); 567 568 it('Bilibili maxDepth=3: shallow view', () => { 569 const raw = loadFixture('snapshot_bilibili.txt'); 570 if (!raw) return; 571 const result = formatSnapshot(raw, { maxDepth: 3 }); 572 const resultLines = result.split('\n').filter((l: string) => l.trim()).length; 573 574 // Depth-limited should be very compact 575 expect(resultLines).toBeLessThan(50); 576 }); 577 }); 578 }); 579