search.js
1 /** 2 * Google Web Search via browser DOM extraction. 3 * Uses browser mode to navigate google.com and extract results from the DOM. 4 * 5 * Extraction strategy (2026-03): Google no longer uses `.g` class containers. 6 * Instead, we find all `a` tags containing `h3` within `#rso`, then walk up 7 * to the result container (`div.tF2Cxc` or closest `div[data-hveid]`) to find 8 * snippets. This approach is resilient to class name changes. 9 */ 10 import { cli, Strategy } from '@jackwener/opencli/registry'; 11 import { CliError } from '@jackwener/opencli/errors'; 12 cli({ 13 site: 'google', 14 name: 'search', 15 description: 'Search Google', 16 domain: 'google.com', 17 strategy: Strategy.PUBLIC, 18 browser: true, 19 args: [ 20 { name: 'keyword', positional: true, required: true, help: 'Search query' }, 21 { name: 'limit', type: 'int', default: 10, help: 'Number of results (1-100)' }, 22 { name: 'lang', default: 'en', help: 'Language short code (e.g. en, zh)' }, 23 ], 24 columns: ['type', 'title', 'url', 'snippet'], 25 func: async (page, args) => { 26 const limit = Math.max(1, Math.min(Number(args.limit), 100)); 27 const keyword = encodeURIComponent(args.keyword); 28 const lang = encodeURIComponent(args.lang); 29 const url = `https://www.google.com/search?q=${keyword}&hl=${lang}&num=${limit}`; 30 await page.goto(url); 31 await page.wait(2); 32 const results = await page.evaluate(` 33 (function() { 34 var results = []; 35 var seenUrls = {}; 36 var rso = document.querySelector('#rso'); 37 if (!rso) return results; 38 39 // -- Featured snippet (scoped to #rso to avoid matching unrelated elements) -- 40 var featuredEl = rso.querySelector('.xpdopen .hgKElc') 41 || rso.querySelector('.IZ6rdc'); 42 if (featuredEl) { 43 var parentBlock = featuredEl.closest('[data-hveid]') || featuredEl.parentElement; 44 var fLink = parentBlock ? parentBlock.querySelector('a[href]') : null; 45 var fUrl = fLink ? fLink.href : ''; 46 if (fUrl) seenUrls[fUrl] = true; 47 results.push({ 48 type: 'snippet', 49 title: featuredEl.textContent.trim().slice(0, 200), 50 url: fUrl, 51 snippet: '', 52 }); 53 } 54 55 // -- Standard search results -- 56 // Strategy: find all links containing h3 within #rso 57 var allLinks = rso.querySelectorAll('a'); 58 for (var i = 0; i < allLinks.length; i++) { 59 var link = allLinks[i]; 60 var h3 = link.querySelector('h3'); 61 if (!h3) continue; 62 63 var href = link.href || ''; 64 // Skip non-http, Google internal links, and duplicates 65 if (!href.match(/^https?:\\/\\//)) continue; 66 if (href.indexOf('google.com/search') !== -1) continue; 67 if (seenUrls[href]) continue; 68 seenUrls[href] = true; 69 70 // Walk up to find result container for snippet extraction 71 var container = link; 72 for (var j = 0; j < 6; j++) { 73 if (container.parentElement && container.parentElement !== rso) { 74 container = container.parentElement; 75 } 76 // Stop at a known result boundary 77 if (container.getAttribute && container.getAttribute('data-hveid')) break; 78 } 79 80 // Find snippet: look for descriptive text, skip breadcrumbs and metadata 81 var snippetText = ''; 82 var titleText = h3.textContent.trim(); 83 var candidates = container.querySelectorAll('span, div'); 84 for (var k = 0; k < candidates.length; k++) { 85 var el = candidates[k]; 86 if (el.querySelector('h3') || el.querySelector('a[href]')) continue; 87 var text = el.textContent.trim(); 88 if (text.length < 40 || text.length > 500) continue; 89 if (text === titleText) continue; 90 // Skip URL breadcrumbs (e.g. "https://example.com › path..." or "Site Namehttps://...") 91 if (text.indexOf('\u203A') !== -1) continue; 92 if (new RegExp('https?://').test(text.slice(0, 60))) continue; 93 snippetText = text; 94 break; 95 } 96 97 results.push({ 98 type: 'result', 99 title: h3.textContent.trim(), 100 url: href, 101 snippet: snippetText.slice(0, 300), 102 }); 103 } 104 105 // -- People Also Ask -- 106 var paaContainers = document.querySelectorAll('[data-sgrd="true"]'); 107 for (var i = 0; i < paaContainers.length; i++) { 108 var questionEl = paaContainers[i].querySelector('span.CSkcDe'); 109 if (questionEl) { 110 results.push({ 111 type: 'paa', 112 title: questionEl.textContent.trim(), 113 url: '', 114 snippet: '', 115 }); 116 } 117 } 118 119 return results; 120 })() 121 `); 122 if (!Array.isArray(results) || results.length === 0) { 123 throw new CliError('NOT_FOUND', 'No search results found', 'Try a different keyword or check for CAPTCHA'); 124 } 125 return results; 126 }, 127 });