shared.js
1 import { ArgumentError, CommandExecutionError } from '@jackwener/opencli/errors'; 2 export const SITE = 'amazon'; 3 export const DOMAIN = 'amazon.com'; 4 export const HOME_URL = 'https://www.amazon.com/'; 5 export const BESTSELLERS_URL = 'https://www.amazon.com/Best-Sellers/zgbs'; 6 export const NEW_RELEASES_URL = 'https://www.amazon.com/gp/new-releases'; 7 export const MOVERS_SHAKERS_URL = 'https://www.amazon.com/gp/movers-and-shakers'; 8 export const SEARCH_URL_PREFIX = 'https://www.amazon.com/s?k='; 9 export const PRODUCT_URL_PREFIX = 'https://www.amazon.com/dp/'; 10 export const DISCUSSION_URL_PREFIX = 'https://www.amazon.com/product-reviews/'; 11 export const STRATEGY = 'cookie'; 12 export const PRIMARY_PRICE_SELECTORS = [ 13 '#corePrice_feature_div .a-offscreen', 14 '#corePriceDisplay_desktop_feature_div .a-offscreen', 15 '#corePrice_desktop .a-offscreen', 16 '#apex_desktop .a-offscreen', 17 '#newAccordionRow_0 .a-offscreen', 18 '#price_inside_buybox', 19 '#priceblock_ourprice', 20 '#priceblock_dealprice', 21 '#tp_price_block_total_price_ww', 22 ]; 23 const ROBOT_TEXT_PATTERNS = [ 24 'Sorry, we just need to make sure you\'re not a robot', 25 'Enter the characters you see below', 26 'Type the characters you see in this image', 27 'To discuss automated access to Amazon data please contact', 28 ]; 29 const AMAZON_RANKING_SPECS = { 30 bestsellers: { 31 commandName: 'bestsellers', 32 rootUrl: BESTSELLERS_URL, 33 pathPattern: /(?:^|\/)zgbs(?:\/|$)/i, 34 invalidInputMessage: 'amazon bestsellers expects a best sellers URL or /zgbs path', 35 invalidInputHint: 'Example: opencli amazon bestsellers https://www.amazon.com/Best-Sellers/zgbs', 36 }, 37 new_releases: { 38 commandName: 'new-releases', 39 rootUrl: NEW_RELEASES_URL, 40 pathPattern: /\/gp\/new-releases(?:\/|$)/i, 41 invalidInputMessage: 'amazon new-releases expects a new releases URL or /gp/new-releases path', 42 invalidInputHint: 'Example: opencli amazon new-releases https://www.amazon.com/gp/new-releases', 43 }, 44 movers_shakers: { 45 commandName: 'movers-shakers', 46 rootUrl: MOVERS_SHAKERS_URL, 47 pathPattern: /\/gp\/movers-and-shakers(?:\/|$)/i, 48 invalidInputMessage: 'amazon movers-shakers expects a movers-and-shakers URL or /gp/movers-and-shakers path', 49 invalidInputHint: 'Example: opencli amazon movers-shakers https://www.amazon.com/gp/movers-and-shakers', 50 }, 51 }; 52 export function cleanText(value) { 53 return typeof value === 'string' 54 ? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim() 55 : ''; 56 } 57 export function cleanMultilineText(value) { 58 return typeof value === 'string' 59 ? value 60 .replace(/\u00a0/g, ' ') 61 .split('\n') 62 .map((line) => line.replace(/\s+/g, ' ').trim()) 63 .filter(Boolean) 64 .join('\n') 65 : ''; 66 } 67 export function uniqueNonEmpty(values) { 68 return [...new Set(values.map((value) => cleanText(value)).filter(Boolean))]; 69 } 70 export function buildProvenance(sourceUrl) { 71 return { 72 source_url: sourceUrl, 73 fetched_at: new Date().toISOString(), 74 strategy: STRATEGY, 75 }; 76 } 77 export function buildSearchUrl(query) { 78 const normalized = cleanText(query); 79 if (!normalized) { 80 throw new ArgumentError('amazon search query cannot be empty'); 81 } 82 return `${SEARCH_URL_PREFIX}${encodeURIComponent(normalized)}`; 83 } 84 export function extractAsin(input) { 85 const normalized = cleanText(input); 86 if (!normalized) 87 return null; 88 if (/^[A-Z0-9]{10}$/i.test(normalized)) { 89 return normalized.toUpperCase(); 90 } 91 const match = normalized.match(/\/(?:dp|gp\/product|product-reviews)\/([A-Z0-9]{10})/i); 92 return match ? match[1].toUpperCase() : null; 93 } 94 export function buildProductUrl(input) { 95 const asin = extractAsin(input); 96 if (!asin) { 97 throw new ArgumentError('amazon product expects an ASIN or product URL', 'Example: opencli amazon product B0FJS72893'); 98 } 99 return `${PRODUCT_URL_PREFIX}${asin}`; 100 } 101 export function buildDiscussionUrl(input) { 102 const asin = extractAsin(input); 103 if (!asin) { 104 throw new ArgumentError('amazon discussion expects an ASIN or product URL', 'Example: opencli amazon discussion B0FJS72893'); 105 } 106 return `${DISCUSSION_URL_PREFIX}${asin}`; 107 } 108 function getRankingSpec(listType) { 109 return AMAZON_RANKING_SPECS[listType]; 110 } 111 export function isSupportedRankingPath(listType, inputUrl) { 112 try { 113 const url = new URL(inputUrl); 114 return getRankingSpec(listType).pathPattern.test(url.pathname); 115 } 116 catch { 117 return false; 118 } 119 } 120 export function resolveRankingUrl(listType, input) { 121 const spec = getRankingSpec(listType); 122 const normalized = cleanText(input); 123 if (!normalized || normalized === 'root') 124 return spec.rootUrl; 125 let candidateUrl; 126 if (normalized.startsWith('/')) { 127 candidateUrl = new URL(normalized, HOME_URL).toString(); 128 } 129 else if (/^https?:\/\//i.test(normalized)) { 130 candidateUrl = canonicalizeAmazonUrl(normalized); 131 } 132 else if (normalized.includes('amazon.') && normalized.includes('/')) { 133 candidateUrl = canonicalizeAmazonUrl(`https://${normalized.replace(/^\/+/, '')}`); 134 } 135 else { 136 throw new ArgumentError(spec.invalidInputMessage, spec.invalidInputHint); 137 } 138 if (!isSupportedRankingPath(listType, candidateUrl)) { 139 throw new ArgumentError(spec.invalidInputMessage, spec.invalidInputHint); 140 } 141 return normalizeRankingInputUrl(candidateUrl); 142 } 143 function normalizeRankingInputUrl(inputUrl) { 144 try { 145 const url = new URL(inputUrl); 146 const normalizedPathSegments = url.pathname 147 .split('/') 148 .filter(Boolean) 149 .filter((segment) => !/^ref=/i.test(segment)); 150 url.pathname = `/${normalizedPathSegments.join('/')}`; 151 url.hash = ''; 152 // Ranking pages are frequently shared with tracking refs that can land on unstable variants. 153 // Dropping ref keeps the canonical ranking path while preserving useful params (for example pg=2). 154 url.searchParams.delete('ref'); 155 return url.toString(); 156 } 157 catch { 158 return inputUrl; 159 } 160 } 161 export function isRankingPaginationUrl(listType, inputUrl) { 162 const absolute = toAbsoluteAmazonUrl(inputUrl); 163 if (!absolute || !isSupportedRankingPath(listType, absolute)) 164 return false; 165 try { 166 const url = new URL(absolute); 167 const ref = cleanText(url.searchParams.get('ref')).toLowerCase(); 168 // pg= query param is the most reliable pagination indicator across all ranking lists 169 return url.searchParams.has('pg') 170 || /(?:^|_)pg(?:_|$)/.test(ref) 171 // Amazon ranking pagination refs: zg_bs_pg_ (bestsellers), zg_bsnr_pg_ (new releases), zg_bsms_pg_ (movers & shakers) 172 || /zg_bs(?:nr|ms)?_pg_/.test(ref); 173 } 174 catch { 175 return false; 176 } 177 } 178 export function extractCategoryNodeId(inputUrl) { 179 const absolute = toAbsoluteAmazonUrl(inputUrl); 180 if (!absolute) 181 return null; 182 try { 183 const url = new URL(absolute); 184 for (const key of ['node', 'nodeid', 'nodeId', 'browseNode']) { 185 const value = cleanText(url.searchParams.get(key)); 186 if (/^\d{4,}$/.test(value)) 187 return value; 188 } 189 const rhValue = cleanText(url.searchParams.get('rh')); 190 const rhMatch = decodeURIComponent(rhValue).match(/(?:^|,)\s*n:(\d{4,})(?:,|$)/i); 191 if (rhMatch) 192 return rhMatch[1]; 193 const pathMatches = [...url.pathname.matchAll(/\/(\d{4,})(?=\/|$)/g)]; 194 if (pathMatches.length > 0) { 195 return pathMatches[pathMatches.length - 1][1]; 196 } 197 } 198 catch { 199 return null; 200 } 201 return null; 202 } 203 export function resolveBestsellersUrl(input) { 204 return resolveRankingUrl('bestsellers', input); 205 } 206 export function canonicalizeAmazonUrl(input) { 207 try { 208 const url = new URL(input); 209 if (!url.hostname.endsWith(DOMAIN)) { 210 throw new Error('not-amazon'); 211 } 212 return url.toString(); 213 } 214 catch { 215 throw new ArgumentError('Invalid Amazon URL'); 216 } 217 } 218 export function toAbsoluteAmazonUrl(value) { 219 const normalized = cleanText(value); 220 if (!normalized) 221 return null; 222 try { 223 return new URL(normalized, HOME_URL).toString(); 224 } 225 catch { 226 return null; 227 } 228 } 229 export function normalizeProductUrl(value) { 230 const normalized = cleanText(value); 231 const asin = extractAsin(normalized); 232 if (asin) 233 return buildProductUrl(asin); 234 return toAbsoluteAmazonUrl(normalized); 235 } 236 export function parsePriceText(text) { 237 const normalized = cleanText(text); 238 const match = normalized.match(/([$€£])\s*(\d+(?:,\d{3})*(?:\.\d+)?)/); 239 if (!match) { 240 return { 241 price_text: normalized || null, 242 price_value: null, 243 currency: null, 244 }; 245 } 246 const currencyMap = { 247 '$': 'USD', 248 '€': 'EUR', 249 '£': 'GBP', 250 }; 251 return { 252 price_text: `${match[1]}${match[2]}`, 253 price_value: Number.parseFloat(match[2].replace(/,/g, '')), 254 currency: currencyMap[match[1]] ?? null, 255 }; 256 } 257 export function parseRatingValue(text) { 258 const normalized = cleanText(text); 259 const match = normalized.match(/(\d+(?:\.\d+)?)\s*out of 5/i); 260 return match ? Number.parseFloat(match[1]) : null; 261 } 262 export function parseReviewCount(text) { 263 const normalized = cleanText(text); 264 const compactMatch = normalized.match(/(\d+(?:\.\d+)?)\s*([kKmM])/); 265 if (compactMatch) { 266 const value = Number.parseFloat(compactMatch[1]); 267 const multiplier = /m/i.test(compactMatch[2]) ? 1_000_000 : 1_000; 268 return Number.isFinite(value) ? Math.round(value * multiplier) : null; 269 } 270 const match = normalized.match(/([\d,]+)/); 271 return match ? Number.parseInt(match[1].replace(/,/g, ''), 10) : null; 272 } 273 export function extractReviewCountFromCardText(text) { 274 const normalized = cleanMultilineText(text); 275 const match = normalized.match(/out of 5 stars(?:, rating details)?\s*([\d,]+)/i); 276 if (match) 277 return match[1]; 278 const numericLine = normalized 279 .split('\n') 280 .map((line) => cleanText(line)) 281 .find((line) => /^[\d,]+$/.test(line)); 282 return numericLine ?? null; 283 } 284 export function isAmazonEntity(text) { 285 const normalized = cleanText(text).toLowerCase(); 286 return normalized.includes('amazon'); 287 } 288 export function firstMeaningfulLine(text) { 289 return cleanMultilineText(text) 290 .split('\n') 291 .map((line) => cleanText(line)) 292 .find(Boolean) 293 ?? ''; 294 } 295 export function trimRatingPrefix(text) { 296 const normalized = cleanText(text); 297 if (!normalized) 298 return null; 299 return normalized.replace(/^\d+(?:\.\d+)?\s*out of 5 stars\s*/i, '').trim() || normalized; 300 } 301 export function isRobotState(state) { 302 const title = cleanText(state.title); 303 const bodyText = cleanMultilineText(state.body_text); 304 return ROBOT_TEXT_PATTERNS.some((pattern) => title.includes(pattern) || bodyText.includes(pattern)); 305 } 306 export function buildChallengeHint(action) { 307 return [ 308 `Open a clean Amazon ${action} page in the shared Chrome profile and clear any robot check first.`, 309 'If you are using CDP, set OPENCLI_CDP_TARGET=amazon.com and avoid parallel Amazon commands against the same browser target.', 310 ].join(' '); 311 } 312 export async function readPageState(page) { 313 const result = await page.evaluate(` 314 (() => ({ 315 href: window.location.href, 316 title: document.title || '', 317 body_text: document.body ? document.body.innerText || '' : '', 318 }))() 319 `); 320 return { 321 href: cleanText(result.href), 322 title: cleanText(result.title), 323 body_text: cleanMultilineText(result.body_text), 324 }; 325 } 326 export async function gotoAndReadState(page, url, settleMs = 2500, action = 'page') { 327 try { 328 await page.goto(url, { settleMs }); 329 await page.wait(1.5); 330 return await readPageState(page); 331 } 332 catch (error) { 333 const message = error instanceof Error ? error.message : String(error); 334 if (message.includes('Inspected target navigated or closed') 335 || message.includes('Cannot find context with specified id') 336 || message.includes('Target closed')) { 337 throw new CommandExecutionError(`amazon ${action} navigation lost the current browser target`, `${buildChallengeHint(action)} If CDP is attached to a stale tab, open a fresh Amazon tab and retry.`); 338 } 339 throw error; 340 } 341 } 342 export function assertUsableState(state, action) { 343 if (!isRobotState(state)) 344 return; 345 throw new CommandExecutionError(`amazon ${action} hit a robot check`, buildChallengeHint(action)); 346 } 347 export const __test__ = { 348 buildSearchUrl, 349 extractAsin, 350 buildProductUrl, 351 buildDiscussionUrl, 352 resolveBestsellersUrl, 353 resolveRankingUrl, 354 isSupportedRankingPath, 355 isRankingPaginationUrl, 356 extractCategoryNodeId, 357 parsePriceText, 358 parseRatingValue, 359 parseReviewCount, 360 extractReviewCountFromCardText, 361 isAmazonEntity, 362 trimRatingPrefix, 363 isRobotState, 364 PRIMARY_PRICE_SELECTORS, 365 };