search.js
1 import { CommandExecutionError } from '@jackwener/opencli/errors'; 2 import { cli, Strategy } from '@jackwener/opencli/registry'; 3 import { buildProvenance, buildSearchUrl, cleanText, extractAsin, normalizeProductUrl, parsePriceText, parseRatingValue, parseReviewCount, assertUsableState, gotoAndReadState, } from './shared.js'; 4 function normalizeSearchCandidate(candidate, rank, sourceUrl) { 5 const productUrl = normalizeProductUrl(candidate.href); 6 const asin = extractAsin(candidate.asin ?? '') ?? extractAsin(productUrl ?? '') ?? null; 7 const price = parsePriceText(candidate.price_text); 8 const ratingText = cleanText(candidate.rating_text) || null; 9 const reviewCountText = cleanText(candidate.review_count_text) || null; 10 const provenance = buildProvenance(sourceUrl); 11 return { 12 rank, 13 asin, 14 title: cleanText(candidate.title) || null, 15 product_url: productUrl, 16 ...provenance, 17 price_text: price.price_text, 18 price_value: price.price_value, 19 currency: price.currency, 20 rating_text: ratingText, 21 rating_value: parseRatingValue(ratingText), 22 review_count_text: reviewCountText, 23 review_count: parseReviewCount(reviewCountText), 24 is_sponsored: candidate.sponsored === true, 25 badges: (candidate.badge_texts ?? []).map((value) => cleanText(value)).filter(Boolean), 26 }; 27 } 28 async function readSearchPayload(page, query) { 29 const url = buildSearchUrl(query); 30 const state = await gotoAndReadState(page, url, 2500, 'search'); 31 assertUsableState(state, 'search'); 32 return await page.evaluate(` 33 (() => ({ 34 href: window.location.href, 35 cards: Array.from(document.querySelectorAll('[data-component-type="s-search-result"]')) 36 .map((card) => ({ 37 asin: card.getAttribute('data-asin') || '', 38 title: card.querySelector('h2')?.textContent || '', 39 href: card.querySelector('a.a-link-normal[href*="/dp/"]')?.href || '', 40 price_text: card.querySelector('.a-price .a-offscreen')?.textContent || '', 41 rating_text: card.querySelector('[aria-label*="out of 5 stars"]')?.getAttribute('aria-label') || '', 42 review_count_text: card.querySelector('a[href*="#customerReviews"]')?.textContent || '', 43 sponsored: /sponsored/i.test(card.innerText || ''), 44 badge_texts: Array.from(card.querySelectorAll('.a-badge-text')).map((node) => node.textContent || ''), 45 })), 46 }))() 47 `); 48 } 49 cli({ 50 site: 'amazon', 51 name: 'search', 52 description: 'Amazon search results for product discovery and coarse filtering', 53 domain: 'amazon.com', 54 strategy: Strategy.COOKIE, 55 navigateBefore: false, 56 args: [ 57 { 58 name: 'query', 59 required: true, 60 positional: true, 61 help: 'Search query, for example "desk shelf organizer"', 62 }, 63 { 64 name: 'limit', 65 type: 'int', 66 default: 20, 67 help: 'Maximum number of results to return (default 20)', 68 }, 69 ], 70 columns: ['rank', 'asin', 'title', 'price_text', 'rating_value', 'review_count'], 71 func: async (page, kwargs) => { 72 const query = String(kwargs.query ?? ''); 73 const limit = Math.max(1, Number(kwargs.limit) || 20); 74 const payload = await readSearchPayload(page, query); 75 const sourceUrl = cleanText(payload.href) || buildSearchUrl(query); 76 const cards = (payload.cards ?? []) 77 .filter((card) => cleanText(card.asin) && cleanText(card.title)) 78 .slice(0, limit); 79 if (cards.length === 0) { 80 throw new CommandExecutionError('amazon search did not expose any product cards', 'The search page may have changed or hit a robot check. Open the same query in Chrome, verify the page is visible, and retry.'); 81 } 82 return cards.map((card, index) => normalizeSearchCandidate(card, index + 1, sourceUrl)); 83 }, 84 }); 85 export const __test__ = { 86 normalizeSearchCandidate, 87 };