/ clis / amazon / search.js
search.js
 1  import { CommandExecutionError } from '@jackwener/opencli/errors';
 2  import { cli, Strategy } from '@jackwener/opencli/registry';
 3  import { buildProvenance, buildSearchUrl, cleanText, extractAsin, normalizeProductUrl, parsePriceText, parseRatingValue, parseReviewCount, assertUsableState, gotoAndReadState, } from './shared.js';
 4  function normalizeSearchCandidate(candidate, rank, sourceUrl) {
 5      const productUrl = normalizeProductUrl(candidate.href);
 6      const asin = extractAsin(candidate.asin ?? '') ?? extractAsin(productUrl ?? '') ?? null;
 7      const price = parsePriceText(candidate.price_text);
 8      const ratingText = cleanText(candidate.rating_text) || null;
 9      const reviewCountText = cleanText(candidate.review_count_text) || null;
10      const provenance = buildProvenance(sourceUrl);
11      return {
12          rank,
13          asin,
14          title: cleanText(candidate.title) || null,
15          product_url: productUrl,
16          ...provenance,
17          price_text: price.price_text,
18          price_value: price.price_value,
19          currency: price.currency,
20          rating_text: ratingText,
21          rating_value: parseRatingValue(ratingText),
22          review_count_text: reviewCountText,
23          review_count: parseReviewCount(reviewCountText),
24          is_sponsored: candidate.sponsored === true,
25          badges: (candidate.badge_texts ?? []).map((value) => cleanText(value)).filter(Boolean),
26      };
27  }
28  async function readSearchPayload(page, query) {
29      const url = buildSearchUrl(query);
30      const state = await gotoAndReadState(page, url, 2500, 'search');
31      assertUsableState(state, 'search');
32      return await page.evaluate(`
33      (() => ({
34        href: window.location.href,
35        cards: Array.from(document.querySelectorAll('[data-component-type="s-search-result"]'))
36          .map((card) => ({
37            asin: card.getAttribute('data-asin') || '',
38            title: card.querySelector('h2')?.textContent || '',
39            href: card.querySelector('a.a-link-normal[href*="/dp/"]')?.href || '',
40            price_text: card.querySelector('.a-price .a-offscreen')?.textContent || '',
41            rating_text: card.querySelector('[aria-label*="out of 5 stars"]')?.getAttribute('aria-label') || '',
42            review_count_text: card.querySelector('a[href*="#customerReviews"]')?.textContent || '',
43            sponsored: /sponsored/i.test(card.innerText || ''),
44            badge_texts: Array.from(card.querySelectorAll('.a-badge-text')).map((node) => node.textContent || ''),
45          })),
46      }))()
47    `);
48  }
49  cli({
50      site: 'amazon',
51      name: 'search',
52      description: 'Amazon search results for product discovery and coarse filtering',
53      domain: 'amazon.com',
54      strategy: Strategy.COOKIE,
55      navigateBefore: false,
56      args: [
57          {
58              name: 'query',
59              required: true,
60              positional: true,
61              help: 'Search query, for example "desk shelf organizer"',
62          },
63          {
64              name: 'limit',
65              type: 'int',
66              default: 20,
67              help: 'Maximum number of results to return (default 20)',
68          },
69      ],
70      columns: ['rank', 'asin', 'title', 'price_text', 'rating_value', 'review_count'],
71      func: async (page, kwargs) => {
72          const query = String(kwargs.query ?? '');
73          const limit = Math.max(1, Number(kwargs.limit) || 20);
74          const payload = await readSearchPayload(page, query);
75          const sourceUrl = cleanText(payload.href) || buildSearchUrl(query);
76          const cards = (payload.cards ?? [])
77              .filter((card) => cleanText(card.asin) && cleanText(card.title))
78              .slice(0, limit);
79          if (cards.length === 0) {
80              throw new CommandExecutionError('amazon search did not expose any product cards', 'The search page may have changed or hit a robot check. Open the same query in Chrome, verify the page is visible, and retry.');
81          }
82          return cards.map((card, index) => normalizeSearchCandidate(card, index + 1, sourceUrl));
83      },
84  });
85  export const __test__ = {
86      normalizeSearchCandidate,
87  };