/ src / browser / find.ts
find.ts
  1  /**
  2   * `browser find --css <sel>` — structured CSS query.
  3   *
  4   * Returns every match of a selector as a JSON envelope agents can read
  5   * without parsing free-text snapshot output. Each entry carries two
  6   * identifiers — a numeric `ref` (matching the snapshot contract) and a
  7   * stable 0-based `nth` — so the agent can act on a specific result via
  8   * either path:
  9   *
 10   *   browser click <ref>              // when ref is numeric
 11   *   browser click "<sel>" --nth <n>  // always works
 12   *
 13   * Refs are *allocated on the spot* for matched elements that were not
 14   * tagged by a prior snapshot: `data-opencli-ref` is set on the element
 15   * and a fingerprint is written into `window.__opencli_ref_identity`
 16   * (same shape the snapshot uses). That makes `find` a first-class entry
 17   * point to the ref system — agents can skip running `browser state`
 18   * when they already know the selector.
 19   *
 20   * Attributes are whitelisted to keep output small and high-signal.
 21   * Invisible elements are still returned so agents can reason about
 22   * offscreen vs truly-missing targets.
 23   *
 24   * When a matched element is a compound form control (date-like input,
 25   * select, file input), the entry gains a `compound` field with the
 26   * rich view from `compound.ts`. This is what kills the three biggest
 27   * agent-fail modes on form pages (wrong date format, guessed options,
 28   * re-uploaded files) without forcing agents to probe further.
 29   */
 30  
 31  import { COMPOUND_INFO_JS, type CompoundInfo } from './compound.js';
 32  
 33  /** Whitelist of attributes surfaced per entry. Keep small; agents do not need full DOM dumps. */
 34  export const FIND_ATTR_WHITELIST = [
 35    'id',
 36    'class',
 37    'name',
 38    'type',
 39    'placeholder',
 40    'aria-label',
 41    'title',
 42    'href',
 43    'value',
 44    'role',
 45    'data-testid',
 46  ] as const;
 47  
 48  export interface FindEntry {
 49    /** Zero-based position within the match set — pair with `--nth` on downstream commands. */
 50    nth: number;
 51    /**
 52     * Numeric data-opencli-ref. Find assigns one if the element was not
 53     * tagged by a prior snapshot, so downstream `browser click <ref>` works
 54     * directly off the find output without requiring `browser state` first.
 55     */
 56    ref: number;
 57    tag: string;
 58    role: string;
 59    text: string;
 60    attrs: Record<string, string>;
 61    visible: boolean;
 62    /**
 63     * Rich view for date / time / datetime-local / month / week / select /
 64     * file inputs. Omitted (undefined) for all other element types. See
 65     * `compound.ts` for the shape.
 66     */
 67    compound?: CompoundInfo;
 68  }
 69  
 70  export interface FindResult {
 71    matches_n: number;
 72    entries: FindEntry[];
 73  }
 74  
 75  export interface FindError {
 76    error: {
 77      code: 'invalid_selector' | 'selector_not_found';
 78      message: string;
 79      hint?: string;
 80    };
 81  }
 82  
 83  export interface FindOptions {
 84    /** Max entries returned. Default 50 — enough to pick from without flooding context. */
 85    limit?: number;
 86    /** Max chars of trimmed text per entry. Default 120. */
 87    textMax?: number;
 88  }
 89  
 90  /**
 91   * Build the browser-side JS that performs the CSS query and emits the
 92   * FindResult (or FindError) envelope. Evaluated inside `page.evaluate`.
 93   */
 94  export function buildFindJs(selector: string, opts: FindOptions = {}): string {
 95    const safeSel = JSON.stringify(selector);
 96    const limit = opts.limit ?? 50;
 97    const textMax = opts.textMax ?? 120;
 98    const whitelist = JSON.stringify(FIND_ATTR_WHITELIST);
 99  
100    return `
101      (() => {
102        const sel = ${safeSel};
103        const LIMIT = ${limit};
104        const TEXT_MAX = ${textMax};
105        const ATTR_WHITELIST = ${whitelist};
106  
107        ${COMPOUND_INFO_JS}
108  
109        let matches;
110        try {
111          matches = document.querySelectorAll(sel);
112        } catch (e) {
113          return {
114            error: {
115              code: 'invalid_selector',
116              message: 'Invalid CSS selector: ' + sel + ' (' + ((e && e.message) || String(e)) + ')',
117              hint: 'Check the selector syntax.',
118            },
119          };
120        }
121  
122        if (matches.length === 0) {
123          return {
124            error: {
125              code: 'selector_not_found',
126              message: 'CSS selector ' + sel + ' matched 0 elements',
127              hint: 'Use browser state to inspect the page, or try a less specific selector.',
128            },
129          };
130        }
131  
132        function pickAttrs(el) {
133          const out = {};
134          for (const key of ATTR_WHITELIST) {
135            const v = el.getAttribute(key);
136            if (v != null && v !== '') out[key] = v;
137          }
138          return out;
139        }
140  
141        function isVisible(el) {
142          const rect = el.getBoundingClientRect();
143          if (rect.width === 0 && rect.height === 0) return false;
144          try {
145            const style = getComputedStyle(el);
146            if (style.display === 'none' || style.visibility === 'hidden') return false;
147            if (parseFloat(style.opacity || '1') === 0) return false;
148          } catch (_) {}
149          return true;
150        }
151  
152        // Ref allocation: reuse \`window.__opencli_ref_identity\` (the same map
153        // snapshot populates) as the source of truth. For matched elements that
154        // don't already carry a \`data-opencli-ref\`, assign the next free numeric
155        // ref and write the fingerprint so the target resolver can verify it on
156        // downstream click/type/get calls.
157        const identity = (window.__opencli_ref_identity = window.__opencli_ref_identity || {});
158        let maxRef = 0;
159        for (const k in identity) {
160          const n = parseInt(k, 10);
161          if (!isNaN(n) && n > maxRef) maxRef = n;
162        }
163        // Also walk any \`data-opencli-ref\` already in the DOM in case the identity
164        // map was cleared but annotations remain (e.g. soft navigation without a
165        // fresh snapshot). Guarantees allocated refs don't collide.
166        try {
167          const tagged = document.querySelectorAll('[data-opencli-ref]');
168          for (let t = 0; t < tagged.length; t++) {
169            const v = tagged[t].getAttribute('data-opencli-ref');
170            const n = v != null && /^\\d+$/.test(v) ? parseInt(v, 10) : NaN;
171            if (!isNaN(n) && n > maxRef) maxRef = n;
172          }
173        } catch (_) {}
174  
175        function fingerprintOf(el) {
176          return {
177            tag: el.tagName.toLowerCase(),
178            role: el.getAttribute('role') || '',
179            text: (el.textContent || '').trim().slice(0, 30),
180            ariaLabel: el.getAttribute('aria-label') || '',
181            id: el.id || '',
182            testId: el.getAttribute('data-testid') || el.getAttribute('data-test') || '',
183          };
184        }
185  
186        const take = Math.min(matches.length, LIMIT);
187        const entries = [];
188        for (let i = 0; i < take; i++) {
189          const el = matches[i];
190          const refAttr = el.getAttribute('data-opencli-ref');
191          let refNum = refAttr != null && /^\\d+$/.test(refAttr) ? parseInt(refAttr, 10) : null;
192          if (refNum === null) {
193            refNum = ++maxRef;
194            try { el.setAttribute('data-opencli-ref', '' + refNum); } catch (_) {}
195            identity['' + refNum] = fingerprintOf(el);
196          } else if (!identity['' + refNum]) {
197            // Ref annotation survived but identity map was cleared — repopulate so the
198            // target resolver's fingerprint check passes on downstream calls.
199            identity['' + refNum] = fingerprintOf(el);
200          }
201          const text = (el.textContent || '').trim();
202          const entry = {
203            nth: i,
204            ref: refNum,
205            tag: el.tagName.toLowerCase(),
206            role: el.getAttribute('role') || '',
207            text: text.length > TEXT_MAX ? text.slice(0, TEXT_MAX) : text,
208            attrs: pickAttrs(el),
209            visible: isVisible(el),
210          };
211          const compound = compoundInfoOf(el);
212          if (compound) entry.compound = compound;
213          entries.push(entry);
214        }
215  
216        return {
217          matches_n: matches.length,
218          entries,
219        };
220      })()
221    `;
222  }
223  
224  export function isFindError(result: unknown): result is FindError {
225    return !!result && typeof result === 'object' && 'error' in result;
226  }