discussion.js
1 import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; 2 import { cli, Strategy } from '@jackwener/opencli/registry'; 3 import { buildProductUrl, buildDiscussionUrl, buildProvenance, cleanText, extractAsin, normalizeProductUrl, parseRatingValue, parseReviewCount, trimRatingPrefix, uniqueNonEmpty, assertUsableState, gotoAndReadState, } from './shared.js'; 4 function normalizeDiscussionPayload(payload) { 5 const sourceUrl = cleanText(payload.href) || buildDiscussionUrl(payload.href ?? ''); 6 const asin = extractAsin(payload.href ?? '') ?? null; 7 const averageRatingText = cleanText(payload.average_rating_text) || null; 8 const totalReviewCountText = cleanText(payload.total_review_count_text) || null; 9 const provenance = buildProvenance(sourceUrl); 10 return { 11 asin, 12 product_url: asin ? normalizeProductUrl(asin) : null, 13 discussion_url: sourceUrl, 14 ...provenance, 15 average_rating_text: averageRatingText, 16 average_rating_value: parseRatingValue(averageRatingText), 17 total_review_count_text: totalReviewCountText, 18 total_review_count: parseReviewCount(totalReviewCountText), 19 qa_urls: uniqueNonEmpty(payload.qa_links ?? []), 20 review_samples: (payload.review_samples ?? []).map((sample) => ({ 21 title: trimRatingPrefix(sample.title) || null, 22 rating_text: cleanText(sample.rating_text) || null, 23 rating_value: parseRatingValue(sample.rating_text), 24 author: cleanText(sample.author) || null, 25 date_text: cleanText(sample.date_text) || null, 26 body: cleanText(sample.body) || null, 27 verified_purchase: sample.verified === true, 28 })), 29 }; 30 } 31 function hasDiscussionSummary(payload) { 32 return Boolean(cleanText(payload.average_rating_text) || cleanText(payload.total_review_count_text)); 33 } 34 function isSignInState(state) { 35 const href = cleanText(state.href).toLowerCase(); 36 const title = cleanText(state.title).toLowerCase(); 37 return href.includes('/ap/signin') 38 || title.includes('amazon sign-in'); 39 } 40 async function readCurrentDiscussionPayload(page, limit) { 41 return await page.evaluate(` 42 (() => ({ 43 href: window.location.href, 44 title: document.title || '', 45 average_rating_text: document.querySelector('[data-hook="rating-out-of-text"]')?.textContent || '', 46 total_review_count_text: document.querySelector('[data-hook="total-review-count"]')?.textContent || '', 47 qa_links: Array.from(document.querySelectorAll('a[href*="ask/questions"]')).map((anchor) => anchor.href || ''), 48 review_samples: Array.from(document.querySelectorAll('[data-hook="review"]')).slice(0, ${limit}).map((card) => ({ 49 title: card.querySelector('[data-hook="review-title"]')?.textContent || '', 50 rating_text: 51 card.querySelector('[data-hook="review-star-rating"]')?.textContent 52 || card.querySelector('[data-hook="cmps-review-star-rating"]')?.textContent 53 || '', 54 author: card.querySelector('.a-profile-name')?.textContent || '', 55 date_text: card.querySelector('[data-hook="review-date"]')?.textContent || '', 56 body: card.querySelector('[data-hook="review-body"]')?.textContent || '', 57 verified: !!card.querySelector('[data-hook="avp-badge"]'), 58 })), 59 }))() 60 `); 61 } 62 async function readDiscussionPayload(page, input, limit) { 63 const reviewUrl = buildDiscussionUrl(input); 64 const reviewState = await gotoAndReadState(page, reviewUrl, 2500, 'discussion'); 65 assertUsableState(reviewState, 'discussion'); 66 const reviewPayload = await readCurrentDiscussionPayload(page, limit); 67 if (hasDiscussionSummary(reviewPayload)) { 68 return reviewPayload; 69 } 70 const productUrl = buildProductUrl(input); 71 const productState = await gotoAndReadState(page, productUrl, 2500, 'discussion'); 72 assertUsableState(productState, 'discussion'); 73 if (isSignInState(reviewState) && isSignInState(productState)) { 74 throw new AuthRequiredError('amazon.com', 'Amazon review discussion requires an active signed-in Amazon session in the shared Chrome profile.'); 75 } 76 const productPayload = await readCurrentDiscussionPayload(page, limit); 77 if (hasDiscussionSummary(productPayload)) { 78 return productPayload; 79 } 80 if (isSignInState(reviewState)) { 81 throw new CommandExecutionError('amazon review page redirected to sign-in and product page fallback did not expose review summary', 'Open the product page in Chrome, verify reviews are visible, and retry.'); 82 } 83 return reviewPayload; 84 } 85 cli({ 86 site: 'amazon', 87 name: 'discussion', 88 description: 'Amazon review summary and sample customer discussion from product review pages', 89 domain: 'amazon.com', 90 strategy: Strategy.COOKIE, 91 navigateBefore: false, 92 args: [ 93 { 94 name: 'input', 95 required: true, 96 positional: true, 97 help: 'ASIN or product URL, for example B0FJS72893', 98 }, 99 { 100 name: 'limit', 101 type: 'int', 102 default: 10, 103 help: 'Maximum number of review samples to return (default 10)', 104 }, 105 ], 106 columns: ['asin', 'average_rating_value', 'total_review_count'], 107 func: async (page, kwargs) => { 108 const input = String(kwargs.input ?? ''); 109 const limit = Math.max(1, Number(kwargs.limit) || 10); 110 const payload = await readDiscussionPayload(page, input, limit); 111 const normalized = normalizeDiscussionPayload(payload); 112 if (!normalized.average_rating_text && !normalized.total_review_count_text) { 113 throw new CommandExecutionError('amazon discussion page did not expose review summary', 'The review page may have changed or hit a robot check. Open the review page in Chrome and retry.'); 114 } 115 return [normalized]; 116 }, 117 }); 118 export const __test__ = { 119 normalizeDiscussionPayload, 120 hasDiscussionSummary, 121 isSignInState, 122 };