/ src / browser / html-tree.test.ts
html-tree.test.ts
  1  import { describe, expect, it } from 'vitest';
  2  import { buildHtmlTreeJs, type BuildHtmlTreeJsOptions, type HtmlTreeResult } from './html-tree.js';
  3  
  4  /**
  5   * The serializer runs in a page context via `page.evaluate`. In unit tests we
  6   * substitute `document` with a minimal stub that mirrors the DOM surface used
  7   * by the expression, then Function-eval the returned JS.
  8   */
  9  function runTreeJs(
 10      root: unknown,
 11      selectorMatches: unknown[],
 12      selector: string | null,
 13      budgets: Omit<BuildHtmlTreeJsOptions, 'selector'> = {},
 14  ): HtmlTreeResult {
 15      const js = buildHtmlTreeJs({ selector, ...budgets });
 16      const fakeDocument = {
 17          querySelectorAll: () => selectorMatches,
 18          documentElement: root,
 19      };
 20      const fn = new Function('document', `return ${js};`);
 21      return fn(fakeDocument) as HtmlTreeResult;
 22  }
 23  
 24  function runTreeJsInvalid(selector: string, errorMessage: string): unknown {
 25      const js = buildHtmlTreeJs({ selector });
 26      const fakeDocument = {
 27          querySelectorAll: () => { const e = new Error(errorMessage); e.name = 'SyntaxError'; throw e; },
 28          documentElement: null,
 29      };
 30      const fn = new Function('document', `return ${js};`);
 31      return fn(fakeDocument);
 32  }
 33  
 34  function el(tag: string, attrs: Record<string, string>, children: Array<ChildOf>, extras: Partial<CompoundExtras> = {}): FakeEl {
 35      return {
 36          nodeType: 1,
 37          tagName: tag.toUpperCase(),
 38          attributes: Object.entries(attrs).map(([name, value]) => ({ name, value })),
 39          childNodes: children,
 40          getAttribute: (name: string) => (name in attrs ? attrs[name]! : null),
 41          value: extras.value,
 42          multiple: extras.multiple,
 43          files: extras.files,
 44          options: extras.options,
 45      };
 46  }
 47  
 48  function txt(value: string): FakeText { return { nodeType: 3, nodeValue: value }; }
 49  
 50  type CompoundExtras = {
 51      value: string;
 52      multiple: boolean;
 53      files: Array<{ name: string }>;
 54      options: Array<{ value: string; label?: string; text?: string; selected?: boolean; disabled?: boolean }>;
 55  };
 56  type FakeEl = {
 57      nodeType: 1;
 58      tagName: string;
 59      attributes: Array<{ name: string; value: string }>;
 60      childNodes: Array<ChildOf>;
 61      getAttribute: (name: string) => string | null;
 62      value?: string;
 63      multiple?: boolean;
 64      files?: Array<{ name: string }>;
 65      options?: Array<{ value: string; label?: string; text?: string; selected?: boolean; disabled?: boolean }>;
 66  };
 67  type FakeText = { nodeType: 3; nodeValue: string };
 68  type ChildOf = FakeEl | FakeText;
 69  
 70  describe('buildHtmlTreeJs', () => {
 71      it('serializes a simple element into {tag, attrs, text, children}', () => {
 72          const root = el('div', { class: 'hero', id: 'x' }, [txt('Hello')]);
 73          const result = runTreeJs(root, [root], null);
 74          expect(result.selector).toBeNull();
 75          expect(result.matched).toBe(1);
 76          expect(result.tree).toEqual({
 77              tag: 'div',
 78              attrs: { class: 'hero', id: 'x' },
 79              text: 'Hello',
 80              children: [],
 81          });
 82      });
 83  
 84      it('collapses whitespace in direct text content only', () => {
 85          const root = el('p', {}, [
 86              txt('  line  \n  one  '),
 87              el('span', {}, [txt('inner text')]),
 88              txt('\tline two\t'),
 89          ]);
 90          const result = runTreeJs(root, [root], null);
 91          expect(result.tree?.text).toBe('line one line two');
 92          expect(result.tree?.children[0].text).toBe('inner text');
 93      });
 94  
 95      it('recurses into element children and preserves their attrs', () => {
 96          const root = el('ul', { role: 'list' }, [
 97              el('li', { 'data-id': '1' }, [txt('first')]),
 98              el('li', { 'data-id': '2' }, [txt('second')]),
 99          ]);
100          const result = runTreeJs(root, [root], null);
101          expect(result.tree?.children).toHaveLength(2);
102          expect(result.tree?.children[0]).toEqual({
103              tag: 'li',
104              attrs: { 'data-id': '1' },
105              text: 'first',
106              children: [],
107          });
108      });
109  
110      it('returns matched=N and serializes only the first match', () => {
111          const first = el('article', { id: 'a' }, [txt('first')]);
112          const second = el('article', { id: 'b' }, [txt('second')]);
113          const result = runTreeJs(null, [first, second], 'article');
114          expect(result.matched).toBe(2);
115          expect(result.tree?.attrs.id).toBe('a');
116      });
117  
118      it('returns tree=null and matched=0 when selector matches nothing', () => {
119          const result = runTreeJs(null, [], '.nothing');
120          expect(result.matched).toBe(0);
121          expect(result.tree).toBeNull();
122      });
123  
124      it('catches SyntaxError from querySelectorAll and returns {invalidSelector:true, reason}', () => {
125          const result = runTreeJsInvalid('##$@@', "'##$@@' is not a valid selector") as {
126              selector: string;
127              invalidSelector: boolean;
128              reason: string;
129          };
130          expect(result.invalidSelector).toBe(true);
131          expect(result.selector).toBe('##$@@');
132          expect(result.reason).toContain('not a valid selector');
133      });
134  
135      it('omits `truncated` when no budget is hit', () => {
136          const root = el('div', {}, [el('span', {}, [txt('ok')])]);
137          const result = runTreeJs(root, [root], null, { depth: 5, childrenMax: 10, textMax: 100 });
138          expect(result.truncated).toBeUndefined();
139      });
140  });
141  
142  describe('buildHtmlTreeJs budget knobs', () => {
143      it('caps tree at `depth` and reports truncated.depth', () => {
144          const deep = el('a', {}, [
145              el('b', {}, [
146                  el('c', {}, [el('d', {}, [txt('deep')])]),
147              ]),
148          ]);
149          // depth=1 → root + one level of children; grandchildren should be dropped.
150          const result = runTreeJs(deep, [deep], null, { depth: 1 });
151          expect(result.tree?.tag).toBe('a');
152          expect(result.tree?.children).toHaveLength(1);
153          expect(result.tree?.children[0].tag).toBe('b');
154          // The "b" node had element children but we hit the depth budget before
155          // recursing into them — children array is empty, truncated.depth is true.
156          expect(result.tree?.children[0].children).toEqual([]);
157          expect(result.truncated?.depth).toBe(true);
158      });
159  
160      it('depth=0 keeps only the root', () => {
161          const root = el('ul', {}, [
162              el('li', {}, [txt('a')]),
163              el('li', {}, [txt('b')]),
164          ]);
165          const result = runTreeJs(root, [root], null, { depth: 0 });
166          expect(result.tree?.children).toEqual([]);
167          expect(result.truncated?.depth).toBe(true);
168      });
169  
170      it('caps children per node at `childrenMax` and reports children_dropped count', () => {
171          const root = el('ul', {}, [
172              el('li', {}, [txt('1')]),
173              el('li', {}, [txt('2')]),
174              el('li', {}, [txt('3')]),
175              el('li', {}, [txt('4')]),
176              el('li', {}, [txt('5')]),
177          ]);
178          const result = runTreeJs(root, [root], null, { childrenMax: 2 });
179          expect(result.tree?.children).toHaveLength(2);
180          expect(result.truncated?.children_dropped).toBe(3);
181      });
182  
183      it('caps direct text per node at `textMax` and reports text_truncated count', () => {
184          const root = el('p', {}, [
185              txt('a'.repeat(50)),
186              el('span', {}, [txt('b'.repeat(50))]),
187          ]);
188          const result = runTreeJs(root, [root], null, { textMax: 10 });
189          expect(result.tree?.text).toHaveLength(10);
190          expect(result.tree?.children[0].text).toHaveLength(10);
191          expect(result.truncated?.text_truncated).toBe(2);
192      });
193  
194      // Blocker B regression: compound contract must ride along with the
195      // json tree so `browser get html --as json` surfaces the full contract
196      // to agents without an extra round-trip.
197      it('attaches compound info to date/file/select nodes and omits it elsewhere', () => {
198          const date = el('input', { type: 'date', min: '2026-01-01' }, [], { value: '2026-04-21' });
199          const file = el('input', { type: 'file', accept: 'image/*' }, [], { multiple: true, files: [{ name: 'a.png' }] });
200          const sel = el('select', { name: 'country' }, [], {
201              options: [
202                  { value: 'us', label: 'United States', selected: true },
203                  { value: 'ca', label: 'Canada' },
204              ],
205          });
206          const plain = el('input', { type: 'text' }, [], { value: 'hi' });
207          const root = el('form', {}, [date, file, sel, plain]);
208          const result = runTreeJs(root, [root], null) as HtmlTreeResult & {
209              tree: { children: Array<{ compound?: unknown }> };
210          };
211          expect(result.tree?.children[0].compound).toMatchObject({ control: 'date', format: 'YYYY-MM-DD', current: '2026-04-21', min: '2026-01-01' });
212          expect(result.tree?.children[1].compound).toMatchObject({ control: 'file', multiple: true, current: ['a.png'], accept: 'image/*' });
213          expect(result.tree?.children[2].compound).toMatchObject({ control: 'select', multiple: false, current: 'United States' });
214          expect(result.tree?.children[3].compound).toBeUndefined();
215      });
216  
217      it('combines budgets and reports every hit', () => {
218          const root = el('ul', {}, [
219              el('li', {}, [txt('x'.repeat(20)), el('em', {}, [txt('y')])]),
220              el('li', {}, []),
221              el('li', {}, []),
222          ]);
223          const result = runTreeJs(root, [root], null, { depth: 1, childrenMax: 2, textMax: 5 });
224          expect(result.tree?.children).toHaveLength(2);
225          expect(result.truncated?.children_dropped).toBe(1);
226          expect(result.truncated?.text_truncated).toBe(1);
227          expect(result.truncated?.depth).toBe(true);
228      });
229  });