/ clis / amazon / shared.js
shared.js
  1  import { ArgumentError, CommandExecutionError } from '@jackwener/opencli/errors';
  2  export const SITE = 'amazon';
  3  export const DOMAIN = 'amazon.com';
  4  export const HOME_URL = 'https://www.amazon.com/';
  5  export const BESTSELLERS_URL = 'https://www.amazon.com/Best-Sellers/zgbs';
  6  export const NEW_RELEASES_URL = 'https://www.amazon.com/gp/new-releases';
  7  export const MOVERS_SHAKERS_URL = 'https://www.amazon.com/gp/movers-and-shakers';
  8  export const SEARCH_URL_PREFIX = 'https://www.amazon.com/s?k=';
  9  export const PRODUCT_URL_PREFIX = 'https://www.amazon.com/dp/';
 10  export const DISCUSSION_URL_PREFIX = 'https://www.amazon.com/product-reviews/';
 11  export const STRATEGY = 'cookie';
 12  export const PRIMARY_PRICE_SELECTORS = [
 13      '#corePrice_feature_div .a-offscreen',
 14      '#corePriceDisplay_desktop_feature_div .a-offscreen',
 15      '#corePrice_desktop .a-offscreen',
 16      '#apex_desktop .a-offscreen',
 17      '#newAccordionRow_0 .a-offscreen',
 18      '#price_inside_buybox',
 19      '#priceblock_ourprice',
 20      '#priceblock_dealprice',
 21      '#tp_price_block_total_price_ww',
 22  ];
 23  const ROBOT_TEXT_PATTERNS = [
 24      'Sorry, we just need to make sure you\'re not a robot',
 25      'Enter the characters you see below',
 26      'Type the characters you see in this image',
 27      'To discuss automated access to Amazon data please contact',
 28  ];
 29  const AMAZON_RANKING_SPECS = {
 30      bestsellers: {
 31          commandName: 'bestsellers',
 32          rootUrl: BESTSELLERS_URL,
 33          pathPattern: /(?:^|\/)zgbs(?:\/|$)/i,
 34          invalidInputMessage: 'amazon bestsellers expects a best sellers URL or /zgbs path',
 35          invalidInputHint: 'Example: opencli amazon bestsellers https://www.amazon.com/Best-Sellers/zgbs',
 36      },
 37      new_releases: {
 38          commandName: 'new-releases',
 39          rootUrl: NEW_RELEASES_URL,
 40          pathPattern: /\/gp\/new-releases(?:\/|$)/i,
 41          invalidInputMessage: 'amazon new-releases expects a new releases URL or /gp/new-releases path',
 42          invalidInputHint: 'Example: opencli amazon new-releases https://www.amazon.com/gp/new-releases',
 43      },
 44      movers_shakers: {
 45          commandName: 'movers-shakers',
 46          rootUrl: MOVERS_SHAKERS_URL,
 47          pathPattern: /\/gp\/movers-and-shakers(?:\/|$)/i,
 48          invalidInputMessage: 'amazon movers-shakers expects a movers-and-shakers URL or /gp/movers-and-shakers path',
 49          invalidInputHint: 'Example: opencli amazon movers-shakers https://www.amazon.com/gp/movers-and-shakers',
 50      },
 51  };
 52  export function cleanText(value) {
 53      return typeof value === 'string'
 54          ? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim()
 55          : '';
 56  }
 57  export function cleanMultilineText(value) {
 58      return typeof value === 'string'
 59          ? value
 60              .replace(/\u00a0/g, ' ')
 61              .split('\n')
 62              .map((line) => line.replace(/\s+/g, ' ').trim())
 63              .filter(Boolean)
 64              .join('\n')
 65          : '';
 66  }
 67  export function uniqueNonEmpty(values) {
 68      return [...new Set(values.map((value) => cleanText(value)).filter(Boolean))];
 69  }
 70  export function buildProvenance(sourceUrl) {
 71      return {
 72          source_url: sourceUrl,
 73          fetched_at: new Date().toISOString(),
 74          strategy: STRATEGY,
 75      };
 76  }
 77  export function buildSearchUrl(query) {
 78      const normalized = cleanText(query);
 79      if (!normalized) {
 80          throw new ArgumentError('amazon search query cannot be empty');
 81      }
 82      return `${SEARCH_URL_PREFIX}${encodeURIComponent(normalized)}`;
 83  }
 84  export function extractAsin(input) {
 85      const normalized = cleanText(input);
 86      if (!normalized)
 87          return null;
 88      if (/^[A-Z0-9]{10}$/i.test(normalized)) {
 89          return normalized.toUpperCase();
 90      }
 91      const match = normalized.match(/\/(?:dp|gp\/product|product-reviews)\/([A-Z0-9]{10})/i);
 92      return match ? match[1].toUpperCase() : null;
 93  }
 94  export function buildProductUrl(input) {
 95      const asin = extractAsin(input);
 96      if (!asin) {
 97          throw new ArgumentError('amazon product expects an ASIN or product URL', 'Example: opencli amazon product B0FJS72893');
 98      }
 99      return `${PRODUCT_URL_PREFIX}${asin}`;
100  }
101  export function buildDiscussionUrl(input) {
102      const asin = extractAsin(input);
103      if (!asin) {
104          throw new ArgumentError('amazon discussion expects an ASIN or product URL', 'Example: opencli amazon discussion B0FJS72893');
105      }
106      return `${DISCUSSION_URL_PREFIX}${asin}`;
107  }
108  function getRankingSpec(listType) {
109      return AMAZON_RANKING_SPECS[listType];
110  }
111  export function isSupportedRankingPath(listType, inputUrl) {
112      try {
113          const url = new URL(inputUrl);
114          return getRankingSpec(listType).pathPattern.test(url.pathname);
115      }
116      catch {
117          return false;
118      }
119  }
120  export function resolveRankingUrl(listType, input) {
121      const spec = getRankingSpec(listType);
122      const normalized = cleanText(input);
123      if (!normalized || normalized === 'root')
124          return spec.rootUrl;
125      let candidateUrl;
126      if (normalized.startsWith('/')) {
127          candidateUrl = new URL(normalized, HOME_URL).toString();
128      }
129      else if (/^https?:\/\//i.test(normalized)) {
130          candidateUrl = canonicalizeAmazonUrl(normalized);
131      }
132      else if (normalized.includes('amazon.') && normalized.includes('/')) {
133          candidateUrl = canonicalizeAmazonUrl(`https://${normalized.replace(/^\/+/, '')}`);
134      }
135      else {
136          throw new ArgumentError(spec.invalidInputMessage, spec.invalidInputHint);
137      }
138      if (!isSupportedRankingPath(listType, candidateUrl)) {
139          throw new ArgumentError(spec.invalidInputMessage, spec.invalidInputHint);
140      }
141      return normalizeRankingInputUrl(candidateUrl);
142  }
143  function normalizeRankingInputUrl(inputUrl) {
144      try {
145          const url = new URL(inputUrl);
146          const normalizedPathSegments = url.pathname
147              .split('/')
148              .filter(Boolean)
149              .filter((segment) => !/^ref=/i.test(segment));
150          url.pathname = `/${normalizedPathSegments.join('/')}`;
151          url.hash = '';
152          // Ranking pages are frequently shared with tracking refs that can land on unstable variants.
153          // Dropping ref keeps the canonical ranking path while preserving useful params (for example pg=2).
154          url.searchParams.delete('ref');
155          return url.toString();
156      }
157      catch {
158          return inputUrl;
159      }
160  }
161  export function isRankingPaginationUrl(listType, inputUrl) {
162      const absolute = toAbsoluteAmazonUrl(inputUrl);
163      if (!absolute || !isSupportedRankingPath(listType, absolute))
164          return false;
165      try {
166          const url = new URL(absolute);
167          const ref = cleanText(url.searchParams.get('ref')).toLowerCase();
168          // pg= query param is the most reliable pagination indicator across all ranking lists
169          return url.searchParams.has('pg')
170              || /(?:^|_)pg(?:_|$)/.test(ref)
171              // Amazon ranking pagination refs: zg_bs_pg_ (bestsellers), zg_bsnr_pg_ (new releases), zg_bsms_pg_ (movers & shakers)
172              || /zg_bs(?:nr|ms)?_pg_/.test(ref);
173      }
174      catch {
175          return false;
176      }
177  }
178  export function extractCategoryNodeId(inputUrl) {
179      const absolute = toAbsoluteAmazonUrl(inputUrl);
180      if (!absolute)
181          return null;
182      try {
183          const url = new URL(absolute);
184          for (const key of ['node', 'nodeid', 'nodeId', 'browseNode']) {
185              const value = cleanText(url.searchParams.get(key));
186              if (/^\d{4,}$/.test(value))
187                  return value;
188          }
189          const rhValue = cleanText(url.searchParams.get('rh'));
190          const rhMatch = decodeURIComponent(rhValue).match(/(?:^|,)\s*n:(\d{4,})(?:,|$)/i);
191          if (rhMatch)
192              return rhMatch[1];
193          const pathMatches = [...url.pathname.matchAll(/\/(\d{4,})(?=\/|$)/g)];
194          if (pathMatches.length > 0) {
195              return pathMatches[pathMatches.length - 1][1];
196          }
197      }
198      catch {
199          return null;
200      }
201      return null;
202  }
203  export function resolveBestsellersUrl(input) {
204      return resolveRankingUrl('bestsellers', input);
205  }
206  export function canonicalizeAmazonUrl(input) {
207      try {
208          const url = new URL(input);
209          if (!url.hostname.endsWith(DOMAIN)) {
210              throw new Error('not-amazon');
211          }
212          return url.toString();
213      }
214      catch {
215          throw new ArgumentError('Invalid Amazon URL');
216      }
217  }
218  export function toAbsoluteAmazonUrl(value) {
219      const normalized = cleanText(value);
220      if (!normalized)
221          return null;
222      try {
223          return new URL(normalized, HOME_URL).toString();
224      }
225      catch {
226          return null;
227      }
228  }
229  export function normalizeProductUrl(value) {
230      const normalized = cleanText(value);
231      const asin = extractAsin(normalized);
232      if (asin)
233          return buildProductUrl(asin);
234      return toAbsoluteAmazonUrl(normalized);
235  }
236  export function parsePriceText(text) {
237      const normalized = cleanText(text);
238      const match = normalized.match(/([$€£])\s*(\d+(?:,\d{3})*(?:\.\d+)?)/);
239      if (!match) {
240          return {
241              price_text: normalized || null,
242              price_value: null,
243              currency: null,
244          };
245      }
246      const currencyMap = {
247          '$': 'USD',
248          '€': 'EUR',
249          '£': 'GBP',
250      };
251      return {
252          price_text: `${match[1]}${match[2]}`,
253          price_value: Number.parseFloat(match[2].replace(/,/g, '')),
254          currency: currencyMap[match[1]] ?? null,
255      };
256  }
257  export function parseRatingValue(text) {
258      const normalized = cleanText(text);
259      const match = normalized.match(/(\d+(?:\.\d+)?)\s*out of 5/i);
260      return match ? Number.parseFloat(match[1]) : null;
261  }
262  export function parseReviewCount(text) {
263      const normalized = cleanText(text);
264      const compactMatch = normalized.match(/(\d+(?:\.\d+)?)\s*([kKmM])/);
265      if (compactMatch) {
266          const value = Number.parseFloat(compactMatch[1]);
267          const multiplier = /m/i.test(compactMatch[2]) ? 1_000_000 : 1_000;
268          return Number.isFinite(value) ? Math.round(value * multiplier) : null;
269      }
270      const match = normalized.match(/([\d,]+)/);
271      return match ? Number.parseInt(match[1].replace(/,/g, ''), 10) : null;
272  }
273  export function extractReviewCountFromCardText(text) {
274      const normalized = cleanMultilineText(text);
275      const match = normalized.match(/out of 5 stars(?:, rating details)?\s*([\d,]+)/i);
276      if (match)
277          return match[1];
278      const numericLine = normalized
279          .split('\n')
280          .map((line) => cleanText(line))
281          .find((line) => /^[\d,]+$/.test(line));
282      return numericLine ?? null;
283  }
284  export function isAmazonEntity(text) {
285      const normalized = cleanText(text).toLowerCase();
286      return normalized.includes('amazon');
287  }
288  export function firstMeaningfulLine(text) {
289      return cleanMultilineText(text)
290          .split('\n')
291          .map((line) => cleanText(line))
292          .find(Boolean)
293          ?? '';
294  }
295  export function trimRatingPrefix(text) {
296      const normalized = cleanText(text);
297      if (!normalized)
298          return null;
299      return normalized.replace(/^\d+(?:\.\d+)?\s*out of 5 stars\s*/i, '').trim() || normalized;
300  }
301  export function isRobotState(state) {
302      const title = cleanText(state.title);
303      const bodyText = cleanMultilineText(state.body_text);
304      return ROBOT_TEXT_PATTERNS.some((pattern) => title.includes(pattern) || bodyText.includes(pattern));
305  }
306  export function buildChallengeHint(action) {
307      return [
308          `Open a clean Amazon ${action} page in the shared Chrome profile and clear any robot check first.`,
309          'If you are using CDP, set OPENCLI_CDP_TARGET=amazon.com and avoid parallel Amazon commands against the same browser target.',
310      ].join(' ');
311  }
312  export async function readPageState(page) {
313      const result = await page.evaluate(`
314      (() => ({
315        href: window.location.href,
316        title: document.title || '',
317        body_text: document.body ? document.body.innerText || '' : '',
318      }))()
319    `);
320      return {
321          href: cleanText(result.href),
322          title: cleanText(result.title),
323          body_text: cleanMultilineText(result.body_text),
324      };
325  }
326  export async function gotoAndReadState(page, url, settleMs = 2500, action = 'page') {
327      try {
328          await page.goto(url, { settleMs });
329          await page.wait(1.5);
330          return await readPageState(page);
331      }
332      catch (error) {
333          const message = error instanceof Error ? error.message : String(error);
334          if (message.includes('Inspected target navigated or closed')
335              || message.includes('Cannot find context with specified id')
336              || message.includes('Target closed')) {
337              throw new CommandExecutionError(`amazon ${action} navigation lost the current browser target`, `${buildChallengeHint(action)} If CDP is attached to a stale tab, open a fresh Amazon tab and retry.`);
338          }
339          throw error;
340      }
341  }
342  export function assertUsableState(state, action) {
343      if (!isRobotState(state))
344          return;
345      throw new CommandExecutionError(`amazon ${action} hit a robot check`, buildChallengeHint(action));
346  }
347  export const __test__ = {
348      buildSearchUrl,
349      extractAsin,
350      buildProductUrl,
351      buildDiscussionUrl,
352      resolveBestsellersUrl,
353      resolveRankingUrl,
354      isSupportedRankingPath,
355      isRankingPaginationUrl,
356      extractCategoryNodeId,
357      parsePriceText,
358      parseRatingValue,
359      parseReviewCount,
360      extractReviewCountFromCardText,
361      isAmazonEntity,
362      trimRatingPrefix,
363      isRobotState,
364      PRIMARY_PRICE_SELECTORS,
365  };