/ src / browser / target-resolver.ts
target-resolver.ts
  1  /**
  2   * Unified target resolver for browser actions.
  3   *
  4   * Resolution pipeline:
  5   *
  6   * 1. Input classification: all-digit → numeric ref path, otherwise → CSS path.
  7   *    The CSS path passes the raw string to `querySelectorAll` and lets the
  8   *    browser parser decide what's valid. No frontend regex whitelist — the
  9   *    goal is that any selector accepted by `browser find --css` is accepted
 10   *    by the same selector on `get/click/type/select`.
 11   * 2. Ref path: cascading match levels (see below), using data-opencli-ref
 12   *    plus the fingerprint map populated by snapshot + find.
 13   * 3. CSS path: querySelectorAll + match-count policy (see ResolveOptions)
 14   * 4. Structured errors:
 15   *    - numeric: not_found / stale_ref
 16   *    - CSS:     invalid_selector / selector_not_found / selector_ambiguous
 17   *               / selector_nth_out_of_range
 18   *
 19   * All JS is generated as strings for page.evaluate() — runs in the browser.
 20   *
 21   * ── Cascading stale-ref (browser-use style) ──────────────────────────
 22   * Strict equality on the fingerprint rejected too many live pages — SPA
 23   * re-renders swap text / role while keeping id + testId. The resolver
 24   * now walks three tiers before giving up:
 25   *
 26   *   1. EXACT        — tag + strong id (id or testId) agree, ≤1 soft mismatch
 27   *   2. STABLE       — tag + strong id agree, soft signals drifted (aria-label,
 28   *                     role, text) — agent gets a warning but the action
 29   *                     proceeds so dynamic pages don't stall
 30   *   3. REIDENTIFIED — original ref either missing from the DOM or fully
 31   *                     mismatched, but the fingerprint uniquely identifies
 32   *                     a single other live element via id / testId /
 33   *                     aria-label. Re-tag that element with the old ref and
 34   *                     surface match_level so the caller knows we swapped.
 35   *
 36   * Only when all three fail do we emit `stale_ref`. Every success envelope
 37   * carries `match_level` so downstream CLIs can surface the weakest tier
 38   * a caller actually traversed.
 39   */
 40  
 41  export interface ResolveOptions {
 42    /**
 43     * When CSS matches multiple elements, pick the element at this 0-based
 44     * index instead of raising `selector_ambiguous`. Raises
 45     * `selector_nth_out_of_range` if `nth >= matches.length`.
 46     */
 47    nth?: number;
 48    /**
 49     * When CSS matches multiple elements, pick the first match instead of
 50     * raising `selector_ambiguous`. Used by read commands (get text / value /
 51     * attributes) to deliver a best-effort answer + matches_n in the envelope.
 52     * Ignored when `nth` is also set (nth wins).
 53     */
 54    firstOnMulti?: boolean;
 55  }
 56  
 57  /** Tier the resolver traversed to land the target. Callers may surface this to agents. */
 58  export type TargetMatchLevel = 'exact' | 'stable' | 'reidentified';
 59  
 60  /**
 61   * Generate JS that resolves a target to a single DOM element.
 62   *
 63   * Returns a JS expression that evaluates to:
 64   *   { ok: true, matches_n, match_level }            — success (el stored in `__resolved`)
 65   *   { ok: false, code, message, hint, candidates, matches_n? }  — structured error
 66   *
 67   * `match_level` is always set on success:
 68   *   - CSS path → 'exact'
 69   *   - numeric ref path → whichever tier matched ('exact' / 'stable' / 'reidentified')
 70   *
 71   * The resolved element is stored in `window.__resolved` for downstream helpers.
 72   */
 73  export function resolveTargetJs(ref: string, opts: ResolveOptions = {}): string {
 74    const safeRef = JSON.stringify(ref);
 75    const nthJs = opts.nth !== undefined ? String(opts.nth | 0) : 'null';
 76    const firstOnMulti = opts.firstOnMulti === true ? 'true' : 'false';
 77    return `
 78      (() => {
 79        const ref = ${safeRef};
 80        const nth = ${nthJs};
 81        const firstOnMulti = ${firstOnMulti};
 82        const identity = window.__opencli_ref_identity || {};
 83  
 84        // ── Classify input ──
 85        // Numeric = snapshot ref. Everything else is handed to querySelectorAll
 86        // and whatever the browser parser accepts is a valid selector. No regex
 87        // shortlist up front: \`find --css\` and \`get/click/type/select\` must agree
 88        // on the same selector surface (see contract note at the top of this file).
 89        const isNumeric = /^\\d+$/.test(ref);
 90  
 91        if (isNumeric) {
 92          // ── Ref path (cascading match levels) ──
 93  
 94          // Shared helper: compute a fingerprint off a live element, same shape
 95          // snapshot + find populate into \`__opencli_ref_identity\`. Kept inline
 96          // (not imported) because this source string is compiled standalone.
 97          function fingerprintOf(node) {
 98            return {
 99              tag: node.tagName.toLowerCase(),
100              role: node.getAttribute('role') || '',
101              text: (node.textContent || '').trim().slice(0, 30),
102              ariaLabel: node.getAttribute('aria-label') || '',
103              id: node.id || '',
104              testId: node.getAttribute('data-testid') || node.getAttribute('data-test') || '',
105            };
106          }
107  
108          // Classify how strongly a live element matches a stored fingerprint.
109          // Returns one of 'exact' | 'stable' | 'mismatch'.
110          //
111          // 'exact'  — tag + every non-empty stored field agrees (±text prefix).
112          // 'stable' — tag agrees AND at least one strong id (id or testId) still
113          //            matches; soft signals (aria-label, role, text) may have
114          //            drifted. This covers SPA re-render / i18n label swaps.
115          // 'mismatch' otherwise.
116          function classifyMatch(fp, liveFp) {
117            if (fp.tag !== liveFp.tag) return 'mismatch';
118  
119            const idMatch = !fp.id || fp.id === liveFp.id;
120            const testIdMatch = !fp.testId || fp.testId === liveFp.testId;
121            const roleMatch = !fp.role || fp.role === liveFp.role;
122            const ariaMatch = !fp.ariaLabel || fp.ariaLabel === liveFp.ariaLabel;
123            const textMatch = !fp.text || (
124              !!liveFp.text && (liveFp.text.startsWith(fp.text) || fp.text.startsWith(liveFp.text))
125            );
126  
127            if (idMatch && testIdMatch && roleMatch && ariaMatch && textMatch) return 'exact';
128  
129            // Strong id decides: if id + testId still agree and we had at least one
130            // of them, accept as stable regardless of soft-signal drift.
131            const hadStrongId = !!fp.id || !!fp.testId;
132            if (hadStrongId && idMatch && testIdMatch) return 'stable';
133  
134            return 'mismatch';
135          }
136  
137          // Try to recover a stale ref by searching the page for a live element
138          // whose fingerprint still matches. Uniqueness is required — if two
139          // candidates match equally well, we refuse rather than silently pick
140          // the wrong one. Covers ref annotations lost to a re-mount.
141          function reidentify(fp) {
142            if (!fp) return null;
143            const candidates = [];
144            function tryAdd(el) {
145              if (el && el.nodeType === 1 && classifyMatch(fp, fingerprintOf(el)) !== 'mismatch') {
146                if (candidates.indexOf(el) === -1) candidates.push(el);
147              }
148            }
149            // Prefer strong-id lookups. If id / testId is present and yields a
150            // unique element, that's our hit.
151            try {
152              if (fp.id) {
153                const byId = document.getElementById(fp.id);
154                if (byId) tryAdd(byId);
155              }
156              if (fp.testId) {
157                const byTestIdA = document.querySelectorAll('[data-testid="' + fp.testId.replace(/"/g, '\\\\"') + '"]');
158                for (let i = 0; i < byTestIdA.length; i++) tryAdd(byTestIdA[i]);
159                const byTestIdB = document.querySelectorAll('[data-test="' + fp.testId.replace(/"/g, '\\\\"') + '"]');
160                for (let i = 0; i < byTestIdB.length; i++) tryAdd(byTestIdB[i]);
161              }
162              // aria-label is only a useful shortlist when nothing stronger is set
163              if (candidates.length === 0 && fp.ariaLabel) {
164                const byAria = document.querySelectorAll('[aria-label="' + fp.ariaLabel.replace(/"/g, '\\\\"') + '"]');
165                for (let i = 0; i < byAria.length; i++) tryAdd(byAria[i]);
166              }
167            } catch (_) { /* bad selectors from weird fp values — skip */ }
168            return candidates.length === 1 ? candidates[0] : null;
169          }
170  
171          const fp = identity[ref];
172          let el = document.querySelector('[data-opencli-ref="' + ref + '"]');
173          if (!el) el = document.querySelector('[data-ref="' + ref + '"]');
174  
175          // If the ref tag is gone from the DOM, last-chance reidentify.
176          if (!el) {
177            const recovered = reidentify(fp);
178            if (recovered) {
179              try {
180                recovered.setAttribute('data-opencli-ref', ref);
181                identity[ref] = fingerprintOf(recovered);
182              } catch (_) {}
183              window.__resolved = recovered;
184              return { ok: true, matches_n: 1, match_level: 'reidentified' };
185            }
186            return {
187              ok: false,
188              code: 'not_found',
189              message: 'ref=' + ref + ' not found in DOM',
190              hint: 'The element may have been removed. Re-run \`opencli browser state\` to get a fresh snapshot.',
191            };
192          }
193  
194          // No stored fingerprint (older page / unknown ref) — accept as exact.
195          if (!fp) {
196            window.__resolved = el;
197            return { ok: true, matches_n: 1, match_level: 'exact' };
198          }
199  
200          const liveFp = fingerprintOf(el);
201          const level = classifyMatch(fp, liveFp);
202  
203          if (level === 'exact' || level === 'stable') {
204            window.__resolved = el;
205            return { ok: true, matches_n: 1, match_level: level };
206          }
207  
208          // Tag / strong-id mismatch — try to find the real element elsewhere
209          // before giving up. Covers e.g. a modal re-mount that discarded the
210          // data-opencli-ref attribute on the surviving node.
211          const recovered = reidentify(fp);
212          if (recovered && recovered !== el) {
213            try {
214              el.removeAttribute('data-opencli-ref');
215              recovered.setAttribute('data-opencli-ref', ref);
216              identity[ref] = fingerprintOf(recovered);
217            } catch (_) {}
218            window.__resolved = recovered;
219            return { ok: true, matches_n: 1, match_level: 'reidentified' };
220          }
221  
222          return {
223            ok: false,
224            code: 'stale_ref',
225            message: 'ref=' + ref + ' was <' + fp.tag + '>' + (fp.text ? '"' + fp.text + '"' : '')
226              + ' but now points to <' + liveFp.tag + '>' + (liveFp.text ? '"' + liveFp.text.slice(0, 30) + '"' : ''),
227            hint: 'The page has changed since the last snapshot. Re-run \`opencli browser state\` to refresh.',
228          };
229        }
230  
231        // ── CSS selector path (any non-numeric input) ──
232        {
233          let matches;
234          try {
235            matches = document.querySelectorAll(ref);
236          } catch (e) {
237            return {
238              ok: false,
239              code: 'invalid_selector',
240              message: 'Invalid CSS selector: ' + ref + ' (' + ((e && e.message) || String(e)) + ')',
241              hint: 'Check the selector syntax. Use ref numbers from snapshot for reliable targeting.',
242            };
243          }
244  
245          if (matches.length === 0) {
246            return {
247              ok: false,
248              code: 'selector_not_found',
249              message: 'CSS selector "' + ref + '" matched 0 elements',
250              hint: 'The element may not exist or may be hidden. Re-run \`opencli browser state\` to check, or use \`opencli browser find --css\` to explore candidates.',
251              matches_n: 0,
252            };
253          }
254  
255          if (nth !== null) {
256            if (nth < 0 || nth >= matches.length) {
257              return {
258                ok: false,
259                code: 'selector_nth_out_of_range',
260                message: 'CSS selector "' + ref + '" matched ' + matches.length + ' elements, but --nth=' + nth + ' is out of range',
261                hint: 'Use --nth between 0 and ' + (matches.length - 1) + ', or omit --nth to target the first match (read ops) or require explicit disambiguation (write ops).',
262                matches_n: matches.length,
263              };
264            }
265            window.__resolved = matches[nth];
266            return { ok: true, matches_n: matches.length, match_level: 'exact' };
267          }
268  
269          if (matches.length > 1 && !firstOnMulti) {
270            const candidates = [];
271            const limit = Math.min(matches.length, 5);
272            for (let i = 0; i < limit; i++) {
273              const m = matches[i];
274              const tag = m.tagName.toLowerCase();
275              const text = (m.textContent || '').trim().slice(0, 40);
276              const id = m.id ? '#' + m.id : '';
277              candidates.push('<' + tag + id + '>' + (text ? ' "' + text + '"' : ''));
278            }
279            return {
280              ok: false,
281              code: 'selector_ambiguous',
282              message: 'CSS selector "' + ref + '" matched ' + matches.length + ' elements',
283              hint: 'Pass --nth <n> (0-based) to pick one, or use a more specific selector. Use \`opencli browser find --css\` to list all candidates.',
284              candidates: candidates,
285              matches_n: matches.length,
286            };
287          }
288  
289          // Single match, OR multi-match with firstOnMulti (read path)
290          window.__resolved = matches[0];
291          return { ok: true, matches_n: matches.length, match_level: 'exact' };
292        }
293      })()
294    `;
295  }
296  
297  /**
298   * Generate JS for click that uses the unified resolver.
299   * Assumes resolveTargetJs has been called and __resolved is set.
300   */
301  export function clickResolvedJs(): string {
302    return `
303      (() => {
304        const el = window.__resolved;
305        if (!el) throw new Error('No resolved element');
306        el.scrollIntoView({ behavior: 'instant', block: 'center' });
307        const rect = el.getBoundingClientRect();
308        const x = Math.round(rect.left + rect.width / 2);
309        const y = Math.round(rect.top + rect.height / 2);
310        try {
311          el.click();
312          return { status: 'clicked', x, y, w: Math.round(rect.width), h: Math.round(rect.height) };
313        } catch (e) {
314          return { status: 'js_failed', x, y, w: Math.round(rect.width), h: Math.round(rect.height), error: e.message };
315        }
316      })()
317    `;
318  }
319  
320  /**
321   * Generate JS for type that uses the unified resolver.
322   */
323  export function typeResolvedJs(text: string): string {
324    const safeText = JSON.stringify(text);
325    return `
326      (() => {
327        const el = window.__resolved;
328        if (!el) throw new Error('No resolved element');
329        el.focus();
330        if (el.isContentEditable) {
331          const sel = window.getSelection();
332          const range = document.createRange();
333          range.selectNodeContents(el);
334          sel.removeAllRanges();
335          sel.addRange(range);
336          document.execCommand('delete', false);
337          document.execCommand('insertText', false, ${safeText});
338          el.dispatchEvent(new Event('input', { bubbles: true }));
339        } else {
340          const proto = el instanceof HTMLTextAreaElement
341            ? HTMLTextAreaElement.prototype
342            : HTMLInputElement.prototype;
343          const nativeSetter = Object.getOwnPropertyDescriptor(proto, 'value')?.set;
344          if (nativeSetter) {
345            nativeSetter.call(el, ${safeText});
346          } else {
347            el.value = ${safeText};
348          }
349          el.dispatchEvent(new Event('input', { bubbles: true }));
350          el.dispatchEvent(new Event('change', { bubbles: true }));
351        }
352        return 'typed';
353      })()
354    `;
355  }
356  
357  /**
358   * Generate JS for scrollTo that uses the unified resolver.
359   * Assumes resolveTargetJs has been called and __resolved is set.
360   */
361  export function scrollResolvedJs(): string {
362    return `
363      (() => {
364        const el = window.__resolved;
365        if (!el) throw new Error('No resolved element');
366        el.scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' });
367        return { scrolled: true, tag: el.tagName.toLowerCase(), text: (el.textContent || '').trim().slice(0, 80) };
368      })()
369    `;
370  }
371  
372  /**
373   * Generate JS to get text content of resolved element.
374   */
375  export function getTextResolvedJs(): string {
376    return `
377      (() => {
378        const el = window.__resolved;
379        if (!el) throw new Error('No resolved element');
380        return el.textContent?.trim() ?? null;
381      })()
382    `;
383  }
384  
385  /**
386   * Generate JS to get value of resolved input/textarea element.
387   */
388  export function getValueResolvedJs(): string {
389    return `
390      (() => {
391        const el = window.__resolved;
392        if (!el) throw new Error('No resolved element');
393        return el.value ?? null;
394      })()
395    `;
396  }
397  
398  /**
399   * Generate JS to get all attributes of resolved element.
400   */
401  export function getAttributesResolvedJs(): string {
402    return `
403      (() => {
404        const el = window.__resolved;
405        if (!el) throw new Error('No resolved element');
406        return JSON.stringify(Object.fromEntries([...el.attributes].map(a => [a.name, a.value])));
407      })()
408    `;
409  }
410  
411  /**
412   * Generate JS to select an option on a resolved <select> element.
413   */
414  export function selectResolvedJs(option: string): string {
415    const safeOption = JSON.stringify(option);
416    return `
417      (() => {
418        const el = window.__resolved;
419        if (!el) throw new Error('No resolved element');
420        if (el.tagName !== 'SELECT') return { error: 'Not a <select>' };
421        const match = Array.from(el.options).find(o => o.text.trim() === ${safeOption} || o.value === ${safeOption});
422        if (!match) return { error: 'Option not found', available: Array.from(el.options).map(o => o.text.trim()) };
423        const setter = Object.getOwnPropertyDescriptor(HTMLSelectElement.prototype, 'value')?.set;
424        if (setter) setter.call(el, match.value); else el.value = match.value;
425        el.dispatchEvent(new Event('input', { bubbles: true }));
426        el.dispatchEvent(new Event('change', { bubbles: true }));
427        return { selected: match.text };
428      })()
429    `;
430  }
431  
432  /**
433   * Generate JS to check if resolved element is an autocomplete/combobox field.
434   */
435  export function isAutocompleteResolvedJs(): string {
436    return `
437      (() => {
438        const el = window.__resolved;
439        if (!el) return false;
440        const role = el.getAttribute('role');
441        const ac = el.getAttribute('aria-autocomplete');
442        const list = el.getAttribute('list');
443        return role === 'combobox' || ac === 'list' || ac === 'both' || !!list;
444      })()
445    `;
446  }