find.ts
1 /** 2 * `browser find --css <sel>` — structured CSS query. 3 * 4 * Returns every match of a selector as a JSON envelope agents can read 5 * without parsing free-text snapshot output. Each entry carries two 6 * identifiers — a numeric `ref` (matching the snapshot contract) and a 7 * stable 0-based `nth` — so the agent can act on a specific result via 8 * either path: 9 * 10 * browser click <ref> // when ref is numeric 11 * browser click "<sel>" --nth <n> // always works 12 * 13 * Refs are *allocated on the spot* for matched elements that were not 14 * tagged by a prior snapshot: `data-opencli-ref` is set on the element 15 * and a fingerprint is written into `window.__opencli_ref_identity` 16 * (same shape the snapshot uses). That makes `find` a first-class entry 17 * point to the ref system — agents can skip running `browser state` 18 * when they already know the selector. 19 * 20 * Attributes are whitelisted to keep output small and high-signal. 21 * Invisible elements are still returned so agents can reason about 22 * offscreen vs truly-missing targets. 23 * 24 * When a matched element is a compound form control (date-like input, 25 * select, file input), the entry gains a `compound` field with the 26 * rich view from `compound.ts`. This is what kills the three biggest 27 * agent-fail modes on form pages (wrong date format, guessed options, 28 * re-uploaded files) without forcing agents to probe further. 29 */ 30 31 import { COMPOUND_INFO_JS, type CompoundInfo } from './compound.js'; 32 33 /** Whitelist of attributes surfaced per entry. Keep small; agents do not need full DOM dumps. */ 34 export const FIND_ATTR_WHITELIST = [ 35 'id', 36 'class', 37 'name', 38 'type', 39 'placeholder', 40 'aria-label', 41 'title', 42 'href', 43 'value', 44 'role', 45 'data-testid', 46 ] as const; 47 48 export interface FindEntry { 49 /** Zero-based position within the match set — pair with `--nth` on downstream commands. */ 50 nth: number; 51 /** 52 * Numeric data-opencli-ref. Find assigns one if the element was not 53 * tagged by a prior snapshot, so downstream `browser click <ref>` works 54 * directly off the find output without requiring `browser state` first. 55 */ 56 ref: number; 57 tag: string; 58 role: string; 59 text: string; 60 attrs: Record<string, string>; 61 visible: boolean; 62 /** 63 * Rich view for date / time / datetime-local / month / week / select / 64 * file inputs. Omitted (undefined) for all other element types. See 65 * `compound.ts` for the shape. 66 */ 67 compound?: CompoundInfo; 68 } 69 70 export interface FindResult { 71 matches_n: number; 72 entries: FindEntry[]; 73 } 74 75 export interface FindError { 76 error: { 77 code: 'invalid_selector' | 'selector_not_found'; 78 message: string; 79 hint?: string; 80 }; 81 } 82 83 export interface FindOptions { 84 /** Max entries returned. Default 50 — enough to pick from without flooding context. */ 85 limit?: number; 86 /** Max chars of trimmed text per entry. Default 120. */ 87 textMax?: number; 88 } 89 90 /** 91 * Build the browser-side JS that performs the CSS query and emits the 92 * FindResult (or FindError) envelope. Evaluated inside `page.evaluate`. 93 */ 94 export function buildFindJs(selector: string, opts: FindOptions = {}): string { 95 const safeSel = JSON.stringify(selector); 96 const limit = opts.limit ?? 50; 97 const textMax = opts.textMax ?? 120; 98 const whitelist = JSON.stringify(FIND_ATTR_WHITELIST); 99 100 return ` 101 (() => { 102 const sel = ${safeSel}; 103 const LIMIT = ${limit}; 104 const TEXT_MAX = ${textMax}; 105 const ATTR_WHITELIST = ${whitelist}; 106 107 ${COMPOUND_INFO_JS} 108 109 let matches; 110 try { 111 matches = document.querySelectorAll(sel); 112 } catch (e) { 113 return { 114 error: { 115 code: 'invalid_selector', 116 message: 'Invalid CSS selector: ' + sel + ' (' + ((e && e.message) || String(e)) + ')', 117 hint: 'Check the selector syntax.', 118 }, 119 }; 120 } 121 122 if (matches.length === 0) { 123 return { 124 error: { 125 code: 'selector_not_found', 126 message: 'CSS selector ' + sel + ' matched 0 elements', 127 hint: 'Use browser state to inspect the page, or try a less specific selector.', 128 }, 129 }; 130 } 131 132 function pickAttrs(el) { 133 const out = {}; 134 for (const key of ATTR_WHITELIST) { 135 const v = el.getAttribute(key); 136 if (v != null && v !== '') out[key] = v; 137 } 138 return out; 139 } 140 141 function isVisible(el) { 142 const rect = el.getBoundingClientRect(); 143 if (rect.width === 0 && rect.height === 0) return false; 144 try { 145 const style = getComputedStyle(el); 146 if (style.display === 'none' || style.visibility === 'hidden') return false; 147 if (parseFloat(style.opacity || '1') === 0) return false; 148 } catch (_) {} 149 return true; 150 } 151 152 // Ref allocation: reuse \`window.__opencli_ref_identity\` (the same map 153 // snapshot populates) as the source of truth. For matched elements that 154 // don't already carry a \`data-opencli-ref\`, assign the next free numeric 155 // ref and write the fingerprint so the target resolver can verify it on 156 // downstream click/type/get calls. 157 const identity = (window.__opencli_ref_identity = window.__opencli_ref_identity || {}); 158 let maxRef = 0; 159 for (const k in identity) { 160 const n = parseInt(k, 10); 161 if (!isNaN(n) && n > maxRef) maxRef = n; 162 } 163 // Also walk any \`data-opencli-ref\` already in the DOM in case the identity 164 // map was cleared but annotations remain (e.g. soft navigation without a 165 // fresh snapshot). Guarantees allocated refs don't collide. 166 try { 167 const tagged = document.querySelectorAll('[data-opencli-ref]'); 168 for (let t = 0; t < tagged.length; t++) { 169 const v = tagged[t].getAttribute('data-opencli-ref'); 170 const n = v != null && /^\\d+$/.test(v) ? parseInt(v, 10) : NaN; 171 if (!isNaN(n) && n > maxRef) maxRef = n; 172 } 173 } catch (_) {} 174 175 function fingerprintOf(el) { 176 return { 177 tag: el.tagName.toLowerCase(), 178 role: el.getAttribute('role') || '', 179 text: (el.textContent || '').trim().slice(0, 30), 180 ariaLabel: el.getAttribute('aria-label') || '', 181 id: el.id || '', 182 testId: el.getAttribute('data-testid') || el.getAttribute('data-test') || '', 183 }; 184 } 185 186 const take = Math.min(matches.length, LIMIT); 187 const entries = []; 188 for (let i = 0; i < take; i++) { 189 const el = matches[i]; 190 const refAttr = el.getAttribute('data-opencli-ref'); 191 let refNum = refAttr != null && /^\\d+$/.test(refAttr) ? parseInt(refAttr, 10) : null; 192 if (refNum === null) { 193 refNum = ++maxRef; 194 try { el.setAttribute('data-opencli-ref', '' + refNum); } catch (_) {} 195 identity['' + refNum] = fingerprintOf(el); 196 } else if (!identity['' + refNum]) { 197 // Ref annotation survived but identity map was cleared — repopulate so the 198 // target resolver's fingerprint check passes on downstream calls. 199 identity['' + refNum] = fingerprintOf(el); 200 } 201 const text = (el.textContent || '').trim(); 202 const entry = { 203 nth: i, 204 ref: refNum, 205 tag: el.tagName.toLowerCase(), 206 role: el.getAttribute('role') || '', 207 text: text.length > TEXT_MAX ? text.slice(0, TEXT_MAX) : text, 208 attrs: pickAttrs(el), 209 visible: isVisible(el), 210 }; 211 const compound = compoundInfoOf(el); 212 if (compound) entry.compound = compound; 213 entries.push(entry); 214 } 215 216 return { 217 matches_n: matches.length, 218 entries, 219 }; 220 })() 221 `; 222 } 223 224 export function isFindError(result: unknown): result is FindError { 225 return !!result && typeof result === 'object' && 'error' in result; 226 }