/ clis / amazon / discussion.js
discussion.js
  1  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
  2  import { cli, Strategy } from '@jackwener/opencli/registry';
  3  import { buildProductUrl, buildDiscussionUrl, buildProvenance, cleanText, extractAsin, normalizeProductUrl, parseRatingValue, parseReviewCount, trimRatingPrefix, uniqueNonEmpty, assertUsableState, gotoAndReadState, } from './shared.js';
  4  function normalizeDiscussionPayload(payload) {
  5      const sourceUrl = cleanText(payload.href) || buildDiscussionUrl(payload.href ?? '');
  6      const asin = extractAsin(payload.href ?? '') ?? null;
  7      const averageRatingText = cleanText(payload.average_rating_text) || null;
  8      const totalReviewCountText = cleanText(payload.total_review_count_text) || null;
  9      const provenance = buildProvenance(sourceUrl);
 10      return {
 11          asin,
 12          product_url: asin ? normalizeProductUrl(asin) : null,
 13          discussion_url: sourceUrl,
 14          ...provenance,
 15          average_rating_text: averageRatingText,
 16          average_rating_value: parseRatingValue(averageRatingText),
 17          total_review_count_text: totalReviewCountText,
 18          total_review_count: parseReviewCount(totalReviewCountText),
 19          qa_urls: uniqueNonEmpty(payload.qa_links ?? []),
 20          review_samples: (payload.review_samples ?? []).map((sample) => ({
 21              title: trimRatingPrefix(sample.title) || null,
 22              rating_text: cleanText(sample.rating_text) || null,
 23              rating_value: parseRatingValue(sample.rating_text),
 24              author: cleanText(sample.author) || null,
 25              date_text: cleanText(sample.date_text) || null,
 26              body: cleanText(sample.body) || null,
 27              verified_purchase: sample.verified === true,
 28          })),
 29      };
 30  }
 31  function hasDiscussionSummary(payload) {
 32      return Boolean(cleanText(payload.average_rating_text) || cleanText(payload.total_review_count_text));
 33  }
 34  function isSignInState(state) {
 35      const href = cleanText(state.href).toLowerCase();
 36      const title = cleanText(state.title).toLowerCase();
 37      return href.includes('/ap/signin')
 38          || title.includes('amazon sign-in');
 39  }
 40  async function readCurrentDiscussionPayload(page, limit) {
 41      return await page.evaluate(`
 42      (() => ({
 43        href: window.location.href,
 44        title: document.title || '',
 45        average_rating_text: document.querySelector('[data-hook="rating-out-of-text"]')?.textContent || '',
 46        total_review_count_text: document.querySelector('[data-hook="total-review-count"]')?.textContent || '',
 47        qa_links: Array.from(document.querySelectorAll('a[href*="ask/questions"]')).map((anchor) => anchor.href || ''),
 48        review_samples: Array.from(document.querySelectorAll('[data-hook="review"]')).slice(0, ${limit}).map((card) => ({
 49          title: card.querySelector('[data-hook="review-title"]')?.textContent || '',
 50          rating_text:
 51            card.querySelector('[data-hook="review-star-rating"]')?.textContent
 52            || card.querySelector('[data-hook="cmps-review-star-rating"]')?.textContent
 53            || '',
 54          author: card.querySelector('.a-profile-name')?.textContent || '',
 55          date_text: card.querySelector('[data-hook="review-date"]')?.textContent || '',
 56          body: card.querySelector('[data-hook="review-body"]')?.textContent || '',
 57          verified: !!card.querySelector('[data-hook="avp-badge"]'),
 58        })),
 59      }))()
 60    `);
 61  }
 62  async function readDiscussionPayload(page, input, limit) {
 63      const reviewUrl = buildDiscussionUrl(input);
 64      const reviewState = await gotoAndReadState(page, reviewUrl, 2500, 'discussion');
 65      assertUsableState(reviewState, 'discussion');
 66      const reviewPayload = await readCurrentDiscussionPayload(page, limit);
 67      if (hasDiscussionSummary(reviewPayload)) {
 68          return reviewPayload;
 69      }
 70      const productUrl = buildProductUrl(input);
 71      const productState = await gotoAndReadState(page, productUrl, 2500, 'discussion');
 72      assertUsableState(productState, 'discussion');
 73      if (isSignInState(reviewState) && isSignInState(productState)) {
 74          throw new AuthRequiredError('amazon.com', 'Amazon review discussion requires an active signed-in Amazon session in the shared Chrome profile.');
 75      }
 76      const productPayload = await readCurrentDiscussionPayload(page, limit);
 77      if (hasDiscussionSummary(productPayload)) {
 78          return productPayload;
 79      }
 80      if (isSignInState(reviewState)) {
 81          throw new CommandExecutionError('amazon review page redirected to sign-in and product page fallback did not expose review summary', 'Open the product page in Chrome, verify reviews are visible, and retry.');
 82      }
 83      return reviewPayload;
 84  }
 85  cli({
 86      site: 'amazon',
 87      name: 'discussion',
 88      description: 'Amazon review summary and sample customer discussion from product review pages',
 89      domain: 'amazon.com',
 90      strategy: Strategy.COOKIE,
 91      navigateBefore: false,
 92      args: [
 93          {
 94              name: 'input',
 95              required: true,
 96              positional: true,
 97              help: 'ASIN or product URL, for example B0FJS72893',
 98          },
 99          {
100              name: 'limit',
101              type: 'int',
102              default: 10,
103              help: 'Maximum number of review samples to return (default 10)',
104          },
105      ],
106      columns: ['asin', 'average_rating_value', 'total_review_count'],
107      func: async (page, kwargs) => {
108          const input = String(kwargs.input ?? '');
109          const limit = Math.max(1, Number(kwargs.limit) || 10);
110          const payload = await readDiscussionPayload(page, input, limit);
111          const normalized = normalizeDiscussionPayload(payload);
112          if (!normalized.average_rating_text && !normalized.total_review_count_text) {
113              throw new CommandExecutionError('amazon discussion page did not expose review summary', 'The review page may have changed or hit a robot check. Open the review page in Chrome and retry.');
114          }
115          return [normalized];
116      },
117  });
118  export const __test__ = {
119      normalizeDiscussionPayload,
120      hasDiscussionSummary,
121      isSignInState,
122  };