html-tree.test.ts
1 import { describe, expect, it } from 'vitest'; 2 import { buildHtmlTreeJs, type BuildHtmlTreeJsOptions, type HtmlTreeResult } from './html-tree.js'; 3 4 /** 5 * The serializer runs in a page context via `page.evaluate`. In unit tests we 6 * substitute `document` with a minimal stub that mirrors the DOM surface used 7 * by the expression, then Function-eval the returned JS. 8 */ 9 function runTreeJs( 10 root: unknown, 11 selectorMatches: unknown[], 12 selector: string | null, 13 budgets: Omit<BuildHtmlTreeJsOptions, 'selector'> = {}, 14 ): HtmlTreeResult { 15 const js = buildHtmlTreeJs({ selector, ...budgets }); 16 const fakeDocument = { 17 querySelectorAll: () => selectorMatches, 18 documentElement: root, 19 }; 20 const fn = new Function('document', `return ${js};`); 21 return fn(fakeDocument) as HtmlTreeResult; 22 } 23 24 function runTreeJsInvalid(selector: string, errorMessage: string): unknown { 25 const js = buildHtmlTreeJs({ selector }); 26 const fakeDocument = { 27 querySelectorAll: () => { const e = new Error(errorMessage); e.name = 'SyntaxError'; throw e; }, 28 documentElement: null, 29 }; 30 const fn = new Function('document', `return ${js};`); 31 return fn(fakeDocument); 32 } 33 34 function el(tag: string, attrs: Record<string, string>, children: Array<ChildOf>, extras: Partial<CompoundExtras> = {}): FakeEl { 35 return { 36 nodeType: 1, 37 tagName: tag.toUpperCase(), 38 attributes: Object.entries(attrs).map(([name, value]) => ({ name, value })), 39 childNodes: children, 40 getAttribute: (name: string) => (name in attrs ? attrs[name]! : null), 41 value: extras.value, 42 multiple: extras.multiple, 43 files: extras.files, 44 options: extras.options, 45 }; 46 } 47 48 function txt(value: string): FakeText { return { nodeType: 3, nodeValue: value }; } 49 50 type CompoundExtras = { 51 value: string; 52 multiple: boolean; 53 files: Array<{ name: string }>; 54 options: Array<{ value: string; label?: string; text?: string; selected?: boolean; disabled?: boolean }>; 55 }; 56 type FakeEl = { 57 nodeType: 1; 58 tagName: string; 59 attributes: Array<{ name: string; value: string }>; 60 childNodes: Array<ChildOf>; 61 getAttribute: (name: string) => string | null; 62 value?: string; 63 multiple?: boolean; 64 files?: Array<{ name: string }>; 65 options?: Array<{ value: string; label?: string; text?: string; selected?: boolean; disabled?: boolean }>; 66 }; 67 type FakeText = { nodeType: 3; nodeValue: string }; 68 type ChildOf = FakeEl | FakeText; 69 70 describe('buildHtmlTreeJs', () => { 71 it('serializes a simple element into {tag, attrs, text, children}', () => { 72 const root = el('div', { class: 'hero', id: 'x' }, [txt('Hello')]); 73 const result = runTreeJs(root, [root], null); 74 expect(result.selector).toBeNull(); 75 expect(result.matched).toBe(1); 76 expect(result.tree).toEqual({ 77 tag: 'div', 78 attrs: { class: 'hero', id: 'x' }, 79 text: 'Hello', 80 children: [], 81 }); 82 }); 83 84 it('collapses whitespace in direct text content only', () => { 85 const root = el('p', {}, [ 86 txt(' line \n one '), 87 el('span', {}, [txt('inner text')]), 88 txt('\tline two\t'), 89 ]); 90 const result = runTreeJs(root, [root], null); 91 expect(result.tree?.text).toBe('line one line two'); 92 expect(result.tree?.children[0].text).toBe('inner text'); 93 }); 94 95 it('recurses into element children and preserves their attrs', () => { 96 const root = el('ul', { role: 'list' }, [ 97 el('li', { 'data-id': '1' }, [txt('first')]), 98 el('li', { 'data-id': '2' }, [txt('second')]), 99 ]); 100 const result = runTreeJs(root, [root], null); 101 expect(result.tree?.children).toHaveLength(2); 102 expect(result.tree?.children[0]).toEqual({ 103 tag: 'li', 104 attrs: { 'data-id': '1' }, 105 text: 'first', 106 children: [], 107 }); 108 }); 109 110 it('returns matched=N and serializes only the first match', () => { 111 const first = el('article', { id: 'a' }, [txt('first')]); 112 const second = el('article', { id: 'b' }, [txt('second')]); 113 const result = runTreeJs(null, [first, second], 'article'); 114 expect(result.matched).toBe(2); 115 expect(result.tree?.attrs.id).toBe('a'); 116 }); 117 118 it('returns tree=null and matched=0 when selector matches nothing', () => { 119 const result = runTreeJs(null, [], '.nothing'); 120 expect(result.matched).toBe(0); 121 expect(result.tree).toBeNull(); 122 }); 123 124 it('catches SyntaxError from querySelectorAll and returns {invalidSelector:true, reason}', () => { 125 const result = runTreeJsInvalid('##$@@', "'##$@@' is not a valid selector") as { 126 selector: string; 127 invalidSelector: boolean; 128 reason: string; 129 }; 130 expect(result.invalidSelector).toBe(true); 131 expect(result.selector).toBe('##$@@'); 132 expect(result.reason).toContain('not a valid selector'); 133 }); 134 135 it('omits `truncated` when no budget is hit', () => { 136 const root = el('div', {}, [el('span', {}, [txt('ok')])]); 137 const result = runTreeJs(root, [root], null, { depth: 5, childrenMax: 10, textMax: 100 }); 138 expect(result.truncated).toBeUndefined(); 139 }); 140 }); 141 142 describe('buildHtmlTreeJs budget knobs', () => { 143 it('caps tree at `depth` and reports truncated.depth', () => { 144 const deep = el('a', {}, [ 145 el('b', {}, [ 146 el('c', {}, [el('d', {}, [txt('deep')])]), 147 ]), 148 ]); 149 // depth=1 → root + one level of children; grandchildren should be dropped. 150 const result = runTreeJs(deep, [deep], null, { depth: 1 }); 151 expect(result.tree?.tag).toBe('a'); 152 expect(result.tree?.children).toHaveLength(1); 153 expect(result.tree?.children[0].tag).toBe('b'); 154 // The "b" node had element children but we hit the depth budget before 155 // recursing into them — children array is empty, truncated.depth is true. 156 expect(result.tree?.children[0].children).toEqual([]); 157 expect(result.truncated?.depth).toBe(true); 158 }); 159 160 it('depth=0 keeps only the root', () => { 161 const root = el('ul', {}, [ 162 el('li', {}, [txt('a')]), 163 el('li', {}, [txt('b')]), 164 ]); 165 const result = runTreeJs(root, [root], null, { depth: 0 }); 166 expect(result.tree?.children).toEqual([]); 167 expect(result.truncated?.depth).toBe(true); 168 }); 169 170 it('caps children per node at `childrenMax` and reports children_dropped count', () => { 171 const root = el('ul', {}, [ 172 el('li', {}, [txt('1')]), 173 el('li', {}, [txt('2')]), 174 el('li', {}, [txt('3')]), 175 el('li', {}, [txt('4')]), 176 el('li', {}, [txt('5')]), 177 ]); 178 const result = runTreeJs(root, [root], null, { childrenMax: 2 }); 179 expect(result.tree?.children).toHaveLength(2); 180 expect(result.truncated?.children_dropped).toBe(3); 181 }); 182 183 it('caps direct text per node at `textMax` and reports text_truncated count', () => { 184 const root = el('p', {}, [ 185 txt('a'.repeat(50)), 186 el('span', {}, [txt('b'.repeat(50))]), 187 ]); 188 const result = runTreeJs(root, [root], null, { textMax: 10 }); 189 expect(result.tree?.text).toHaveLength(10); 190 expect(result.tree?.children[0].text).toHaveLength(10); 191 expect(result.truncated?.text_truncated).toBe(2); 192 }); 193 194 // Blocker B regression: compound contract must ride along with the 195 // json tree so `browser get html --as json` surfaces the full contract 196 // to agents without an extra round-trip. 197 it('attaches compound info to date/file/select nodes and omits it elsewhere', () => { 198 const date = el('input', { type: 'date', min: '2026-01-01' }, [], { value: '2026-04-21' }); 199 const file = el('input', { type: 'file', accept: 'image/*' }, [], { multiple: true, files: [{ name: 'a.png' }] }); 200 const sel = el('select', { name: 'country' }, [], { 201 options: [ 202 { value: 'us', label: 'United States', selected: true }, 203 { value: 'ca', label: 'Canada' }, 204 ], 205 }); 206 const plain = el('input', { type: 'text' }, [], { value: 'hi' }); 207 const root = el('form', {}, [date, file, sel, plain]); 208 const result = runTreeJs(root, [root], null) as HtmlTreeResult & { 209 tree: { children: Array<{ compound?: unknown }> }; 210 }; 211 expect(result.tree?.children[0].compound).toMatchObject({ control: 'date', format: 'YYYY-MM-DD', current: '2026-04-21', min: '2026-01-01' }); 212 expect(result.tree?.children[1].compound).toMatchObject({ control: 'file', multiple: true, current: ['a.png'], accept: 'image/*' }); 213 expect(result.tree?.children[2].compound).toMatchObject({ control: 'select', multiple: false, current: 'United States' }); 214 expect(result.tree?.children[3].compound).toBeUndefined(); 215 }); 216 217 it('combines budgets and reports every hit', () => { 218 const root = el('ul', {}, [ 219 el('li', {}, [txt('x'.repeat(20)), el('em', {}, [txt('y')])]), 220 el('li', {}, []), 221 el('li', {}, []), 222 ]); 223 const result = runTreeJs(root, [root], null, { depth: 1, childrenMax: 2, textMax: 5 }); 224 expect(result.tree?.children).toHaveLength(2); 225 expect(result.truncated?.children_dropped).toBe(1); 226 expect(result.truncated?.text_truncated).toBe(1); 227 expect(result.truncated?.depth).toBe(true); 228 }); 229 });