/ src / snapshotFormatter.test.ts
snapshotFormatter.test.ts
  1  /**
  2   * Tests for snapshotFormatter.ts: snapshot tree filtering.
  3   *
  4   * Uses sanitized excerpts from real websites (GitHub, Bilibili, Twitter)
  5   * to validate noise filtering, annotation stripping, and output quality.
  6   */
  7  
  8  import { describe, it, expect } from 'vitest';
  9  import { formatSnapshot } from './snapshotFormatter.js';
 10  
 11  // ---------------------------------------------------------------------------
 12  // Fixtures: sanitized excerpts from real aria snapshots
 13  // ---------------------------------------------------------------------------
 14  
 15  /** GitHub dashboard navigation bar (generic-heavy, refs, /url: lines) */
 16  const GITHUB_NAV = `\
 17  - generic [ref=e2]:
 18    - region
 19    - generic [ref=e3]:
 20      - link "Skip to content" [ref=e4] [cursor=pointer]:
 21        - /url: "#start-of-content"
 22      - banner "Global Navigation Menu" [ref=e8]:
 23        - generic [ref=e9]:
 24          - generic [ref=e10]:
 25            - button "Open menu" [ref=e12] [cursor=pointer]:
 26              - img [ref=e13]
 27            - link "Homepage" [ref=e15] [cursor=pointer]:
 28              - /url: /
 29              - img [ref=e16]
 30          - generic [ref=e18]:
 31            - navigation "Breadcrumbs" [ref=e19]:
 32              - list [ref=e20]:
 33                - listitem [ref=e21]:
 34                  - link "Dashboard" [ref=e22] [cursor=pointer]:
 35                    - /url: https://github.com/
 36                    - generic [ref=e23]: Dashboard
 37            - button "Search or jump to…" [ref=e26] [cursor=pointer]:
 38              - generic [ref=e27]:
 39                - generic:
 40                  - img
 41                - generic [ref=e28]:
 42                  - generic:
 43                    - text: Type
 44                    - generic: /
 45                    - text: to search`;
 46  
 47  /** GitHub repo list sidebar (repetitive structure) */
 48  const GITHUB_REPOS = `\
 49  - navigation "Repositories" [ref=e79]:
 50    - generic [ref=e80]:
 51      - generic [ref=e81]:
 52        - heading "Top repositories" [level=2] [ref=e82]
 53        - link "New" [ref=e83] [cursor=pointer]:
 54          - /url: /new
 55          - generic [ref=e84]:
 56            - generic:
 57              - img
 58            - generic [ref=e85]: New
 59      - search "Top repositories" [ref=e86]:
 60        - textbox "Find a repository…" [ref=e87]
 61      - list [ref=e88]:
 62        - listitem [ref=e89]:
 63          - generic [ref=e90]:
 64            - link "Repository" [ref=e91] [cursor=pointer]:
 65              - /url: /jackwener/twitter-cli
 66              - img "Repository" [ref=e92]
 67            - link "jackwener/twitter-cli" [ref=e94] [cursor=pointer]:
 68              - /url: /jackwener/twitter-cli
 69        - listitem [ref=e95]:
 70          - generic [ref=e96]:
 71            - link "Repository" [ref=e97] [cursor=pointer]:
 72              - /url: /jackwener/opencli
 73              - img "Repository" [ref=e98]
 74            - link "jackwener/opencli" [ref=e100] [cursor=pointer]:
 75              - /url: /jackwener/opencli`;
 76  
 77  /** Bilibili nav bar (Chinese text, multiple link categories) */
 78  const BILIBILI_NAV = `\
 79  - generic [ref=e3]:
 80    - generic [ref=e4]:
 81      - generic [ref=e5]:
 82        - list [ref=e6]:
 83          - listitem [ref=e7]:
 84            - link "首页" [ref=e8] [cursor=pointer]:
 85              - /url: //www.bilibili.com
 86              - img [ref=e9]
 87              - generic [ref=e11]: 首页
 88          - listitem [ref=e12]:
 89            - link "番剧" [ref=e13] [cursor=pointer]:
 90              - /url: //www.bilibili.com/anime/
 91          - listitem [ref=e14]:
 92            - link "直播" [ref=e15] [cursor=pointer]:
 93              - /url: //live.bilibili.com
 94        - generic [ref=e32]:
 95          - textbox "冷知识 金廷26年胜率100%" [ref=e34]
 96          - img [ref=e36] [cursor=pointer]`;
 97  
 98  /** Bilibili video card (deeply nested generic wrappers, view counts) */
 99  const BILIBILI_VIDEO = `\
100  - generic [ref=e363]:
101    - link "超酷时刻 即将到来 3.3万 40 16:24" [ref=e364] [cursor=pointer]:
102      - /url: https://www.bilibili.com/video/BV1zVw5zoEFt
103      - generic [ref=e365]:
104        - img "超酷时刻 即将到来" [ref=e368]
105        - generic:
106          - generic:
107            - generic:
108              - generic:
109                - img
110                - generic: 3.3万
111              - generic:
112                - img
113                - generic: "40"
114            - generic: 16:24
115    - generic [ref=e370]:
116      - heading "超酷时刻 即将到来" [level=3] [ref=e371]:
117        - link "超酷时刻 即将到来" [ref=e372] [cursor=pointer]:
118          - /url: https://www.bilibili.com/video/BV1zVw5zoEFt
119      - link "Tesla特斯拉中国 · 13小时前" [ref=e374] [cursor=pointer]:
120        - /url: //space.bilibili.com/491190876
121        - img [ref=e375]
122        - generic "Tesla特斯拉中国" [ref=e379]
123        - generic [ref=e380]: · 13小时前`;
124  
125  /** Empty paragraph blocks (Bilibili bottom section) */
126  const BILIBILI_EMPTY = `\
127  - generic [ref=e576]:
128    - generic:
129      - generic:
130        - generic:
131          - paragraph
132          - paragraph
133          - paragraph
134  - generic [ref=e577]:
135    - generic:
136      - generic:
137        - generic:
138          - paragraph
139          - paragraph
140          - paragraph`;
141  
142  /** Twitter-style feed item (simulated based on common patterns) */
143  const TWITTER_TWEET = `\
144  - main [ref=e100]:
145    - region "Timeline" [ref=e101]:
146      - article [ref=e200]:
147        - generic [ref=e201]:
148          - generic [ref=e202]:
149            - link "@elonmusk" [ref=e203] [cursor=pointer]:
150              - /url: /elonmusk
151              - img "@elonmusk" [ref=e204]
152            - generic [ref=e205]:
153              - generic [ref=e206]: Elon Musk
154              - generic [ref=e207]: @elonmusk
155          - generic [ref=e208]:
156            - generic [ref=e209]: This is a very long tweet that goes on and on about various things including technology, space, and other random topics that make this text exceed any reasonable length limit we might want to set for display purposes in a CLI interface.
157          - generic [ref=e210]:
158            - button "Reply" [ref=e211] [cursor=pointer]:
159              - img [ref=e212]
160              - generic [ref=e213]: "42"
161            - button "Retweet" [ref=e214] [cursor=pointer]:
162              - img [ref=e215]
163              - generic [ref=e216]: "1.2K"
164            - button "Like" [ref=e217] [cursor=pointer]:
165              - img [ref=e218]
166              - generic [ref=e219]: "5.3K"
167      - separator [ref=e300]`;
168  
169  // ---------------------------------------------------------------------------
170  // Tests
171  // ---------------------------------------------------------------------------
172  
173  describe('formatSnapshot', () => {
174    describe('basic behavior', () => {
175      it('returns empty string for empty/null input', () => {
176        expect(formatSnapshot('')).toBe('');
177        expect(formatSnapshot(null as unknown as string)).toBe('');
178        expect(formatSnapshot(undefined as unknown as string)).toBe('');
179      });
180  
181      it('strips [ref=...] and [cursor=...] annotations', () => {
182        const input = '- button "Click me" [ref=e42] [cursor=pointer]';
183        const result = formatSnapshot(input);
184        expect(result).not.toContain('[ref=');
185        expect(result).not.toContain('[cursor=');
186        expect(result).toContain('button "Click me"');
187      });
188  
189      it('removes /url: metadata lines', () => {
190        const input = `\
191  - link "Home" [ref=e1] [cursor=pointer]:
192    - /url: https://example.com
193    - generic [ref=e2]: Home`;
194        const result = formatSnapshot(input);
195        expect(result).not.toContain('/url:');
196        expect(result).not.toContain('https://example.com');
197      });
198  
199      it('assigns sequential [@N] refs to interactive elements', () => {
200        const input = `\
201  - button "Save" [ref=e1]
202  - link "Cancel" [ref=e2]
203  - textbox "Name" [ref=e3]`;
204        const result = formatSnapshot(input);
205        expect(result).toContain('[@1] button "Save"');
206        expect(result).toContain('[@2] link "Cancel"');
207        expect(result).toContain('[@3] textbox "Name"');
208      });
209    });
210  
211    describe('noise filtering', () => {
212      it('removes generic nodes without text', () => {
213        const input = `\
214  - generic [ref=e1]:
215    - generic [ref=e2]:
216      - button "Click" [ref=e3]`;
217        const result = formatSnapshot(input);
218        expect(result).not.toMatch(/^generic/m);
219        expect(result).toContain('button "Click"');
220      });
221  
222      it('keeps generic nodes WITH text content', () => {
223        const input = '- generic [ref=e23]: Dashboard';
224        const result = formatSnapshot(input);
225        expect(result).toContain('generic: Dashboard');
226      });
227  
228      it('removes img nodes without alt text', () => {
229        const input = `\
230  - img [ref=e13]
231  - img "Profile photo" [ref=e14]`;
232        const result = formatSnapshot(input);
233        expect(result).not.toContain('img\n');
234        expect(result).toContain('img "Profile photo"');
235      });
236  
237      it('removes separator nodes', () => {
238        const input = '- separator [ref=e304]';
239        const result = formatSnapshot(input);
240        expect(result).toBe('');
241      });
242  
243      it('removes presentation/none roles', () => {
244        const input = `\
245  - presentation [ref=e1]
246  - none [ref=e2]
247  - button "OK" [ref=e3]`;
248        const result = formatSnapshot(input);
249        expect(result).not.toContain('presentation');
250        expect(result).not.toContain('none');
251        expect(result).toContain('button "OK"');
252      });
253    });
254  
255    describe('empty container pruning', () => {
256      it('prunes containers with no visible children', () => {
257        const input = `\
258  - list [ref=e88]:
259    - listitem [ref=e89]:
260      - generic [ref=e90]:
261        - img [ref=e91]`;
262        // After filtering: generic (no text) → removed, img (no alt) → removed
263        // listitem becomes empty → pruned, list becomes empty → pruned
264        const result = formatSnapshot(input);
265        expect(result).toBe('');
266      });
267  
268      it('keeps containers with visible children', () => {
269        const input = `\
270  - list [ref=e1]:
271    - listitem [ref=e2]:
272      - link "Home" [ref=e3]`;
273        const result = formatSnapshot(input);
274        expect(result).toContain('list');
275        expect(result).toContain('listitem');
276        expect(result).toContain('link "Home"');
277      });
278    });
279  
280    describe('maxDepth option', () => {
281      it('limits output to specified depth', () => {
282        const input = `\
283  - main [ref=e1]:
284    - heading "Dashboard" [ref=e2]
285    - navigation [ref=e3]:
286      - list [ref=e4]:
287        - link "Deep link" [ref=e5]`;
288        const result = formatSnapshot(input, { maxDepth: 2 });
289        expect(result).toContain('main');
290        expect(result).toContain('heading "Dashboard"');
291        // navigation is pruned: its only child list is empty after link is excluded by maxDepth
292        expect(result).not.toContain('navigation');
293        expect(result).not.toContain('Deep link');
294      });
295  
296      it('handles maxDepth=0 correctly (was a bug)', () => {
297        const input = `\
298  - heading "Title" [ref=e1]
299    - link "Sub" [ref=e2]`;
300        const result = formatSnapshot(input, { maxDepth: 0 });
301        expect(result).toContain('heading "Title"');
302        expect(result).not.toContain('Sub');
303      });
304    });
305  
306    describe('interactive mode', () => {
307      it('keeps interactive elements and landmarks', () => {
308        const result = formatSnapshot(GITHUB_NAV, { interactive: true });
309        // Interactive elements should be present
310        expect(result).toContain('button');
311        expect(result).toContain('link');
312        // Landmarks preserved
313        expect(result).toContain('banner');
314        expect(result).toContain('navigation');
315      });
316  
317      it('filters non-interactive, non-landmark, textless nodes', () => {
318        const input = `\
319  - main [ref=e1]:
320    - generic [ref=e2]:
321      - generic [ref=e3]:
322        - button "Save" [ref=e4]
323    - generic [ref=e5]: some text content`;
324        const result = formatSnapshot(input, { interactive: true });
325        expect(result).toContain('main');
326        expect(result).toContain('button "Save"');
327        // generic with text is kept
328        expect(result).toContain('generic: some text content');
329      });
330    });
331  
332    describe('compact mode', () => {
333      it('strips bracket annotations and collapses whitespace', () => {
334        const input = '- button "Save" [ref=e1] [cursor=pointer] [level=2]';
335        const result = formatSnapshot(input, { compact: true });
336        // ref/cursor already stripped, but [level=...] should also go in compact
337        expect(result).not.toContain('[level=');
338        expect(result).toContain('button');
339      });
340    });
341  
342    describe('maxTextLength option', () => {
343      it('truncates long content lines', () => {
344        const input = '- heading "This is a very long heading that should be truncated at some point" [ref=e1]';
345        const result = formatSnapshot(input, { maxTextLength: 30 });
346        expect(result.length).toBeLessThanOrEqual(35); // some tolerance for ellipsis
347        expect(result).toContain('…');
348      });
349    });
350  
351    // ---------------------------------------------------------------------------
352    // Real-world snapshot integration tests
353    // ---------------------------------------------------------------------------
354  
355    describe('GitHub snapshot', () => {
356      it('drastically reduces nav bar output', () => {
357        const raw = GITHUB_NAV;
358        const rawLineCount = raw.split('\n').length;
359        const result = formatSnapshot(raw);
360        const resultLineCount = result.split('\n').length;
361  
362        // Should significantly reduce line count
363        expect(resultLineCount).toBeLessThan(rawLineCount);
364  
365        // Key content preserved
366        expect(result).toContain('link "Skip to content"');
367        expect(result).toContain('banner "Global Navigation Menu"');
368        expect(result).toContain('link "Dashboard"');
369        expect(result).toContain('button "Search or jump to…"');
370  
371        // Noise removed
372        expect(result).not.toContain('[ref=');
373        expect(result).not.toContain('/url:');
374      });
375  
376      it('preserves repo list structure', () => {
377        const result = formatSnapshot(GITHUB_REPOS);
378        expect(result).toContain('navigation "Repositories"');
379        expect(result).toContain('heading "Top repositories"');
380        expect(result).toContain('textbox "Find a repository…"');
381        expect(result).toContain('link "jackwener/twitter-cli"');
382        expect(result).toContain('link "jackwener/opencli"');
383        expect(result).toContain('img "Repository"');
384  
385        // No refs or urls
386        expect(result).not.toContain('[ref=');
387        expect(result).not.toContain('/url:');
388      });
389    });
390  
391    describe('Bilibili snapshot', () => {
392      it('cleans nav bar with Chinese text', () => {
393        const result = formatSnapshot(BILIBILI_NAV);
394        expect(result).toContain('link "首页"');
395        expect(result).toContain('link "番剧"');
396        expect(result).toContain('link "直播"');
397        expect(result).toContain('textbox "冷知识 金廷26年胜率100%"');
398        expect(result).not.toContain('[ref=');
399      });
400  
401      it('handles video card with deeply nested wrappers', () => {
402        const result = formatSnapshot(BILIBILI_VIDEO);
403        expect(result).toContain('link "超酷时刻 即将到来 3.3万 40 16:24"');
404        expect(result).toContain('heading "超酷时刻 即将到来"');
405        expect(result).toContain('generic "Tesla特斯拉中国"');
406  
407        // Deeply nested view count generics with text are kept
408        expect(result).toContain('3.3万');
409      });
410  
411      it('prunes empty paragraph blocks', () => {
412        const result = formatSnapshot(BILIBILI_EMPTY);
413        // All content is generic (no text) and empty paragraphs
414        // After noise filtering, everything should be pruned
415        expect(result.trim()).toBe('');
416      });
417    });
418  
419    describe('Twitter snapshot', () => {
420      it('preserves tweet structure', () => {
421        const result = formatSnapshot(TWITTER_TWEET);
422        expect(result).toContain('main');
423        expect(result).toContain('region "Timeline"');
424        expect(result).toContain('link "@elonmusk"');
425        expect(result).toContain('button "Reply"');
426        expect(result).toContain('button "Like"');
427        expect(result).not.toContain('separator');
428      });
429  
430      it('truncates long tweet text with maxTextLength', () => {
431        const result = formatSnapshot(TWITTER_TWEET, { maxTextLength: 60 });
432        // The long tweet text should be truncated
433        expect(result).toContain('…');
434        // But short elements are unaffected
435        expect(result).toContain('button "Reply"');
436      });
437  
438      it('interactive mode keeps only buttons and links', () => {
439        const result = formatSnapshot(TWITTER_TWEET, { interactive: true });
440        expect(result).toContain('link "@elonmusk"');
441        expect(result).toContain('button "Reply"');
442        expect(result).toContain('button "Retweet"');
443        expect(result).toContain('button "Like"');
444        // Structural landmarks kept
445        expect(result).toContain('main');
446        expect(result).toContain('region "Timeline"');
447        expect(result).toContain('article');
448      });
449  
450      it('combined options: interactive + maxDepth', () => {
451        // With maxDepth: 2 and interactive, depth > 2 is filtered.
452        // article at depth 2 has only generic children (noise-filtered),
453        // so article gets pruned by container pruning, which cascades up.
454        const result = formatSnapshot(TWITTER_TWEET, { interactive: true, maxDepth: 2 });
455        expect(result).toContain('main');
456        expect(result).not.toContain('button "Reply"');
457        expect(result).not.toContain('link "@elonmusk"');
458      });
459    });
460  
461    describe('reduction ratios on real data', () => {
462      it('achieves significant reduction on GitHub nav', () => {
463        const rawLines = GITHUB_NAV.split('\n').length;
464        const formatted = formatSnapshot(GITHUB_NAV);
465        const formattedLines = formatted.split('\n').filter(l => l.trim()).length;
466        // Expect at least 40% reduction
467        expect(formattedLines).toBeLessThan(rawLines * 0.6);
468      });
469  
470      it('achieves significant reduction on Bilibili video card', () => {
471        const rawLines = BILIBILI_VIDEO.split('\n').length;
472        const formatted = formatSnapshot(BILIBILI_VIDEO);
473        const formattedLines = formatted.split('\n').filter(l => l.trim()).length;
474        // Expect at least 30% reduction
475        expect(formattedLines).toBeLessThan(rawLines * 0.7);
476      });
477    });
478  
479    // ---------------------------------------------------------------------------
480    // Full-page snapshot fixture tests (loaded from __fixtures__/)
481    // ---------------------------------------------------------------------------
482  
483    describe('full-page snapshots from fixtures', () => {
484      const fs = require('node:fs');
485      const path = require('node:path');
486      const fixturesDir = path.join(__dirname, '__fixtures__');
487  
488      function loadFixture(name: string): string | null {
489        const p = path.join(fixturesDir, name);
490        if (!fs.existsSync(p)) return null;
491        return fs.readFileSync(p, 'utf-8');
492      }
493  
494      it('GitHub: significant reduction and clean output', () => {
495        const raw = loadFixture('snapshot_github.txt');
496        if (!raw) return;
497        const rawLines = raw.split('\n').length;
498        const result = formatSnapshot(raw);
499        const resultLines = result.split('\n').filter((l: string) => l.trim()).length;
500  
501        // Should achieve > 50% reduction on GitHub dashboard (heavy generic noise)
502        expect(resultLines).toBeLessThan(rawLines * 0.5);
503  
504        // No annotations remain
505        expect(result).not.toContain('[ref=');
506        expect(result).not.toContain('[cursor=');
507        expect(result).not.toContain('/url:');
508  
509        // Key content preserved
510        expect(result).toContain('link "Skip to content"');
511        expect(result).toContain('banner "Global Navigation Menu"');
512        expect(result).toContain('heading "Dashboard"');
513      });
514  
515      it('Bilibili: significant reduction and Chinese text preserved', () => {
516        const raw = loadFixture('snapshot_bilibili.txt');
517        if (!raw) return;
518        const rawLines = raw.split('\n').length;
519        const result = formatSnapshot(raw);
520        const resultLines = result.split('\n').filter((l: string) => l.trim()).length;
521  
522        // Should achieve > 40% reduction on Bilibili (lots of imgs and generics)
523        expect(resultLines).toBeLessThan(rawLines * 0.6);
524  
525        // No annotations remain
526        expect(result).not.toContain('[ref=');
527        expect(result).not.toContain('[cursor=');
528  
529        // Chinese text preserved
530        expect(result).toContain('link "首页"');
531        expect(result).toContain('link "番剧"');
532      });
533  
534      it('Twitter/X: significant reduction and tweet structure preserved', () => {
535        const raw = loadFixture('snapshot_twitter.txt');
536        if (!raw) return;
537        const rawLines = raw.split('\n').length;
538        const result = formatSnapshot(raw);
539        const resultLines = result.split('\n').filter((l: string) => l.trim()).length;
540  
541        // Should achieve > 40% reduction on Twitter/X
542        expect(resultLines).toBeLessThan(rawLines * 0.6);
543  
544        // No annotations remain
545        expect(result).not.toContain('[ref=');
546        expect(result).not.toContain('[cursor=');
547        expect(result).not.toContain('/url:');
548  
549        // Key structure preserved
550        expect(result).toContain('main');
551      });
552  
553      it('GitHub interactive mode: drastic reduction', () => {
554        const raw = loadFixture('snapshot_github.txt');
555        if (!raw) return;
556        const result = formatSnapshot(raw, { interactive: true });
557        const resultLines = result.split('\n').filter((l: string) => l.trim()).length;
558  
559        // Interactive mode should be much more aggressive
560        expect(resultLines).toBeLessThan(200);
561  
562        // Interactive elements still present
563        expect(result).toContain('button');
564        expect(result).toContain('link');
565        expect(result).toContain('textbox');
566      });
567  
568      it('Bilibili maxDepth=3: shallow view', () => {
569        const raw = loadFixture('snapshot_bilibili.txt');
570        if (!raw) return;
571        const result = formatSnapshot(raw, { maxDepth: 3 });
572        const resultLines = result.split('\n').filter((l: string) => l.trim()).length;
573  
574        // Depth-limited should be very compact
575        expect(resultLines).toBeLessThan(50);
576      });
577    });
578  });
579