target-resolver.ts
1 /** 2 * Unified target resolver for browser actions. 3 * 4 * Resolution pipeline: 5 * 6 * 1. Input classification: all-digit → numeric ref path, otherwise → CSS path. 7 * The CSS path passes the raw string to `querySelectorAll` and lets the 8 * browser parser decide what's valid. No frontend regex whitelist — the 9 * goal is that any selector accepted by `browser find --css` is accepted 10 * by the same selector on `get/click/type/select`. 11 * 2. Ref path: cascading match levels (see below), using data-opencli-ref 12 * plus the fingerprint map populated by snapshot + find. 13 * 3. CSS path: querySelectorAll + match-count policy (see ResolveOptions) 14 * 4. Structured errors: 15 * - numeric: not_found / stale_ref 16 * - CSS: invalid_selector / selector_not_found / selector_ambiguous 17 * / selector_nth_out_of_range 18 * 19 * All JS is generated as strings for page.evaluate() — runs in the browser. 20 * 21 * ── Cascading stale-ref (browser-use style) ────────────────────────── 22 * Strict equality on the fingerprint rejected too many live pages — SPA 23 * re-renders swap text / role while keeping id + testId. The resolver 24 * now walks three tiers before giving up: 25 * 26 * 1. EXACT — tag + strong id (id or testId) agree, ≤1 soft mismatch 27 * 2. STABLE — tag + strong id agree, soft signals drifted (aria-label, 28 * role, text) — agent gets a warning but the action 29 * proceeds so dynamic pages don't stall 30 * 3. REIDENTIFIED — original ref either missing from the DOM or fully 31 * mismatched, but the fingerprint uniquely identifies 32 * a single other live element via id / testId / 33 * aria-label. Re-tag that element with the old ref and 34 * surface match_level so the caller knows we swapped. 35 * 36 * Only when all three fail do we emit `stale_ref`. Every success envelope 37 * carries `match_level` so downstream CLIs can surface the weakest tier 38 * a caller actually traversed. 39 */ 40 41 export interface ResolveOptions { 42 /** 43 * When CSS matches multiple elements, pick the element at this 0-based 44 * index instead of raising `selector_ambiguous`. Raises 45 * `selector_nth_out_of_range` if `nth >= matches.length`. 46 */ 47 nth?: number; 48 /** 49 * When CSS matches multiple elements, pick the first match instead of 50 * raising `selector_ambiguous`. Used by read commands (get text / value / 51 * attributes) to deliver a best-effort answer + matches_n in the envelope. 52 * Ignored when `nth` is also set (nth wins). 53 */ 54 firstOnMulti?: boolean; 55 } 56 57 /** Tier the resolver traversed to land the target. Callers may surface this to agents. */ 58 export type TargetMatchLevel = 'exact' | 'stable' | 'reidentified'; 59 60 /** 61 * Generate JS that resolves a target to a single DOM element. 62 * 63 * Returns a JS expression that evaluates to: 64 * { ok: true, matches_n, match_level } — success (el stored in `__resolved`) 65 * { ok: false, code, message, hint, candidates, matches_n? } — structured error 66 * 67 * `match_level` is always set on success: 68 * - CSS path → 'exact' 69 * - numeric ref path → whichever tier matched ('exact' / 'stable' / 'reidentified') 70 * 71 * The resolved element is stored in `window.__resolved` for downstream helpers. 72 */ 73 export function resolveTargetJs(ref: string, opts: ResolveOptions = {}): string { 74 const safeRef = JSON.stringify(ref); 75 const nthJs = opts.nth !== undefined ? String(opts.nth | 0) : 'null'; 76 const firstOnMulti = opts.firstOnMulti === true ? 'true' : 'false'; 77 return ` 78 (() => { 79 const ref = ${safeRef}; 80 const nth = ${nthJs}; 81 const firstOnMulti = ${firstOnMulti}; 82 const identity = window.__opencli_ref_identity || {}; 83 84 // ── Classify input ── 85 // Numeric = snapshot ref. Everything else is handed to querySelectorAll 86 // and whatever the browser parser accepts is a valid selector. No regex 87 // shortlist up front: \`find --css\` and \`get/click/type/select\` must agree 88 // on the same selector surface (see contract note at the top of this file). 89 const isNumeric = /^\\d+$/.test(ref); 90 91 if (isNumeric) { 92 // ── Ref path (cascading match levels) ── 93 94 // Shared helper: compute a fingerprint off a live element, same shape 95 // snapshot + find populate into \`__opencli_ref_identity\`. Kept inline 96 // (not imported) because this source string is compiled standalone. 97 function fingerprintOf(node) { 98 return { 99 tag: node.tagName.toLowerCase(), 100 role: node.getAttribute('role') || '', 101 text: (node.textContent || '').trim().slice(0, 30), 102 ariaLabel: node.getAttribute('aria-label') || '', 103 id: node.id || '', 104 testId: node.getAttribute('data-testid') || node.getAttribute('data-test') || '', 105 }; 106 } 107 108 // Classify how strongly a live element matches a stored fingerprint. 109 // Returns one of 'exact' | 'stable' | 'mismatch'. 110 // 111 // 'exact' — tag + every non-empty stored field agrees (±text prefix). 112 // 'stable' — tag agrees AND at least one strong id (id or testId) still 113 // matches; soft signals (aria-label, role, text) may have 114 // drifted. This covers SPA re-render / i18n label swaps. 115 // 'mismatch' otherwise. 116 function classifyMatch(fp, liveFp) { 117 if (fp.tag !== liveFp.tag) return 'mismatch'; 118 119 const idMatch = !fp.id || fp.id === liveFp.id; 120 const testIdMatch = !fp.testId || fp.testId === liveFp.testId; 121 const roleMatch = !fp.role || fp.role === liveFp.role; 122 const ariaMatch = !fp.ariaLabel || fp.ariaLabel === liveFp.ariaLabel; 123 const textMatch = !fp.text || ( 124 !!liveFp.text && (liveFp.text.startsWith(fp.text) || fp.text.startsWith(liveFp.text)) 125 ); 126 127 if (idMatch && testIdMatch && roleMatch && ariaMatch && textMatch) return 'exact'; 128 129 // Strong id decides: if id + testId still agree and we had at least one 130 // of them, accept as stable regardless of soft-signal drift. 131 const hadStrongId = !!fp.id || !!fp.testId; 132 if (hadStrongId && idMatch && testIdMatch) return 'stable'; 133 134 return 'mismatch'; 135 } 136 137 // Try to recover a stale ref by searching the page for a live element 138 // whose fingerprint still matches. Uniqueness is required — if two 139 // candidates match equally well, we refuse rather than silently pick 140 // the wrong one. Covers ref annotations lost to a re-mount. 141 function reidentify(fp) { 142 if (!fp) return null; 143 const candidates = []; 144 function tryAdd(el) { 145 if (el && el.nodeType === 1 && classifyMatch(fp, fingerprintOf(el)) !== 'mismatch') { 146 if (candidates.indexOf(el) === -1) candidates.push(el); 147 } 148 } 149 // Prefer strong-id lookups. If id / testId is present and yields a 150 // unique element, that's our hit. 151 try { 152 if (fp.id) { 153 const byId = document.getElementById(fp.id); 154 if (byId) tryAdd(byId); 155 } 156 if (fp.testId) { 157 const byTestIdA = document.querySelectorAll('[data-testid="' + fp.testId.replace(/"/g, '\\\\"') + '"]'); 158 for (let i = 0; i < byTestIdA.length; i++) tryAdd(byTestIdA[i]); 159 const byTestIdB = document.querySelectorAll('[data-test="' + fp.testId.replace(/"/g, '\\\\"') + '"]'); 160 for (let i = 0; i < byTestIdB.length; i++) tryAdd(byTestIdB[i]); 161 } 162 // aria-label is only a useful shortlist when nothing stronger is set 163 if (candidates.length === 0 && fp.ariaLabel) { 164 const byAria = document.querySelectorAll('[aria-label="' + fp.ariaLabel.replace(/"/g, '\\\\"') + '"]'); 165 for (let i = 0; i < byAria.length; i++) tryAdd(byAria[i]); 166 } 167 } catch (_) { /* bad selectors from weird fp values — skip */ } 168 return candidates.length === 1 ? candidates[0] : null; 169 } 170 171 const fp = identity[ref]; 172 let el = document.querySelector('[data-opencli-ref="' + ref + '"]'); 173 if (!el) el = document.querySelector('[data-ref="' + ref + '"]'); 174 175 // If the ref tag is gone from the DOM, last-chance reidentify. 176 if (!el) { 177 const recovered = reidentify(fp); 178 if (recovered) { 179 try { 180 recovered.setAttribute('data-opencli-ref', ref); 181 identity[ref] = fingerprintOf(recovered); 182 } catch (_) {} 183 window.__resolved = recovered; 184 return { ok: true, matches_n: 1, match_level: 'reidentified' }; 185 } 186 return { 187 ok: false, 188 code: 'not_found', 189 message: 'ref=' + ref + ' not found in DOM', 190 hint: 'The element may have been removed. Re-run \`opencli browser state\` to get a fresh snapshot.', 191 }; 192 } 193 194 // No stored fingerprint (older page / unknown ref) — accept as exact. 195 if (!fp) { 196 window.__resolved = el; 197 return { ok: true, matches_n: 1, match_level: 'exact' }; 198 } 199 200 const liveFp = fingerprintOf(el); 201 const level = classifyMatch(fp, liveFp); 202 203 if (level === 'exact' || level === 'stable') { 204 window.__resolved = el; 205 return { ok: true, matches_n: 1, match_level: level }; 206 } 207 208 // Tag / strong-id mismatch — try to find the real element elsewhere 209 // before giving up. Covers e.g. a modal re-mount that discarded the 210 // data-opencli-ref attribute on the surviving node. 211 const recovered = reidentify(fp); 212 if (recovered && recovered !== el) { 213 try { 214 el.removeAttribute('data-opencli-ref'); 215 recovered.setAttribute('data-opencli-ref', ref); 216 identity[ref] = fingerprintOf(recovered); 217 } catch (_) {} 218 window.__resolved = recovered; 219 return { ok: true, matches_n: 1, match_level: 'reidentified' }; 220 } 221 222 return { 223 ok: false, 224 code: 'stale_ref', 225 message: 'ref=' + ref + ' was <' + fp.tag + '>' + (fp.text ? '"' + fp.text + '"' : '') 226 + ' but now points to <' + liveFp.tag + '>' + (liveFp.text ? '"' + liveFp.text.slice(0, 30) + '"' : ''), 227 hint: 'The page has changed since the last snapshot. Re-run \`opencli browser state\` to refresh.', 228 }; 229 } 230 231 // ── CSS selector path (any non-numeric input) ── 232 { 233 let matches; 234 try { 235 matches = document.querySelectorAll(ref); 236 } catch (e) { 237 return { 238 ok: false, 239 code: 'invalid_selector', 240 message: 'Invalid CSS selector: ' + ref + ' (' + ((e && e.message) || String(e)) + ')', 241 hint: 'Check the selector syntax. Use ref numbers from snapshot for reliable targeting.', 242 }; 243 } 244 245 if (matches.length === 0) { 246 return { 247 ok: false, 248 code: 'selector_not_found', 249 message: 'CSS selector "' + ref + '" matched 0 elements', 250 hint: 'The element may not exist or may be hidden. Re-run \`opencli browser state\` to check, or use \`opencli browser find --css\` to explore candidates.', 251 matches_n: 0, 252 }; 253 } 254 255 if (nth !== null) { 256 if (nth < 0 || nth >= matches.length) { 257 return { 258 ok: false, 259 code: 'selector_nth_out_of_range', 260 message: 'CSS selector "' + ref + '" matched ' + matches.length + ' elements, but --nth=' + nth + ' is out of range', 261 hint: 'Use --nth between 0 and ' + (matches.length - 1) + ', or omit --nth to target the first match (read ops) or require explicit disambiguation (write ops).', 262 matches_n: matches.length, 263 }; 264 } 265 window.__resolved = matches[nth]; 266 return { ok: true, matches_n: matches.length, match_level: 'exact' }; 267 } 268 269 if (matches.length > 1 && !firstOnMulti) { 270 const candidates = []; 271 const limit = Math.min(matches.length, 5); 272 for (let i = 0; i < limit; i++) { 273 const m = matches[i]; 274 const tag = m.tagName.toLowerCase(); 275 const text = (m.textContent || '').trim().slice(0, 40); 276 const id = m.id ? '#' + m.id : ''; 277 candidates.push('<' + tag + id + '>' + (text ? ' "' + text + '"' : '')); 278 } 279 return { 280 ok: false, 281 code: 'selector_ambiguous', 282 message: 'CSS selector "' + ref + '" matched ' + matches.length + ' elements', 283 hint: 'Pass --nth <n> (0-based) to pick one, or use a more specific selector. Use \`opencli browser find --css\` to list all candidates.', 284 candidates: candidates, 285 matches_n: matches.length, 286 }; 287 } 288 289 // Single match, OR multi-match with firstOnMulti (read path) 290 window.__resolved = matches[0]; 291 return { ok: true, matches_n: matches.length, match_level: 'exact' }; 292 } 293 })() 294 `; 295 } 296 297 /** 298 * Generate JS for click that uses the unified resolver. 299 * Assumes resolveTargetJs has been called and __resolved is set. 300 */ 301 export function clickResolvedJs(): string { 302 return ` 303 (() => { 304 const el = window.__resolved; 305 if (!el) throw new Error('No resolved element'); 306 el.scrollIntoView({ behavior: 'instant', block: 'center' }); 307 const rect = el.getBoundingClientRect(); 308 const x = Math.round(rect.left + rect.width / 2); 309 const y = Math.round(rect.top + rect.height / 2); 310 try { 311 el.click(); 312 return { status: 'clicked', x, y, w: Math.round(rect.width), h: Math.round(rect.height) }; 313 } catch (e) { 314 return { status: 'js_failed', x, y, w: Math.round(rect.width), h: Math.round(rect.height), error: e.message }; 315 } 316 })() 317 `; 318 } 319 320 /** 321 * Generate JS for type that uses the unified resolver. 322 */ 323 export function typeResolvedJs(text: string): string { 324 const safeText = JSON.stringify(text); 325 return ` 326 (() => { 327 const el = window.__resolved; 328 if (!el) throw new Error('No resolved element'); 329 el.focus(); 330 if (el.isContentEditable) { 331 const sel = window.getSelection(); 332 const range = document.createRange(); 333 range.selectNodeContents(el); 334 sel.removeAllRanges(); 335 sel.addRange(range); 336 document.execCommand('delete', false); 337 document.execCommand('insertText', false, ${safeText}); 338 el.dispatchEvent(new Event('input', { bubbles: true })); 339 } else { 340 const proto = el instanceof HTMLTextAreaElement 341 ? HTMLTextAreaElement.prototype 342 : HTMLInputElement.prototype; 343 const nativeSetter = Object.getOwnPropertyDescriptor(proto, 'value')?.set; 344 if (nativeSetter) { 345 nativeSetter.call(el, ${safeText}); 346 } else { 347 el.value = ${safeText}; 348 } 349 el.dispatchEvent(new Event('input', { bubbles: true })); 350 el.dispatchEvent(new Event('change', { bubbles: true })); 351 } 352 return 'typed'; 353 })() 354 `; 355 } 356 357 /** 358 * Generate JS for scrollTo that uses the unified resolver. 359 * Assumes resolveTargetJs has been called and __resolved is set. 360 */ 361 export function scrollResolvedJs(): string { 362 return ` 363 (() => { 364 const el = window.__resolved; 365 if (!el) throw new Error('No resolved element'); 366 el.scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' }); 367 return { scrolled: true, tag: el.tagName.toLowerCase(), text: (el.textContent || '').trim().slice(0, 80) }; 368 })() 369 `; 370 } 371 372 /** 373 * Generate JS to get text content of resolved element. 374 */ 375 export function getTextResolvedJs(): string { 376 return ` 377 (() => { 378 const el = window.__resolved; 379 if (!el) throw new Error('No resolved element'); 380 return el.textContent?.trim() ?? null; 381 })() 382 `; 383 } 384 385 /** 386 * Generate JS to get value of resolved input/textarea element. 387 */ 388 export function getValueResolvedJs(): string { 389 return ` 390 (() => { 391 const el = window.__resolved; 392 if (!el) throw new Error('No resolved element'); 393 return el.value ?? null; 394 })() 395 `; 396 } 397 398 /** 399 * Generate JS to get all attributes of resolved element. 400 */ 401 export function getAttributesResolvedJs(): string { 402 return ` 403 (() => { 404 const el = window.__resolved; 405 if (!el) throw new Error('No resolved element'); 406 return JSON.stringify(Object.fromEntries([...el.attributes].map(a => [a.name, a.value]))); 407 })() 408 `; 409 } 410 411 /** 412 * Generate JS to select an option on a resolved <select> element. 413 */ 414 export function selectResolvedJs(option: string): string { 415 const safeOption = JSON.stringify(option); 416 return ` 417 (() => { 418 const el = window.__resolved; 419 if (!el) throw new Error('No resolved element'); 420 if (el.tagName !== 'SELECT') return { error: 'Not a <select>' }; 421 const match = Array.from(el.options).find(o => o.text.trim() === ${safeOption} || o.value === ${safeOption}); 422 if (!match) return { error: 'Option not found', available: Array.from(el.options).map(o => o.text.trim()) }; 423 const setter = Object.getOwnPropertyDescriptor(HTMLSelectElement.prototype, 'value')?.set; 424 if (setter) setter.call(el, match.value); else el.value = match.value; 425 el.dispatchEvent(new Event('input', { bubbles: true })); 426 el.dispatchEvent(new Event('change', { bubbles: true })); 427 return { selected: match.text }; 428 })() 429 `; 430 } 431 432 /** 433 * Generate JS to check if resolved element is an autocomplete/combobox field. 434 */ 435 export function isAutocompleteResolvedJs(): string { 436 return ` 437 (() => { 438 const el = window.__resolved; 439 if (!el) return false; 440 const role = el.getAttribute('role'); 441 const ac = el.getAttribute('aria-autocomplete'); 442 const list = el.getAttribute('list'); 443 return role === 'combobox' || ac === 'list' || ac === 'both' || !!list; 444 })() 445 `; 446 }