/ clis / coupang / search.js
search.js
  1  import { cli, Strategy } from '@jackwener/opencli/registry';
  2  import { mergeSearchItems, normalizeSearchItem, sanitizeSearchItems } from './utils.js';
  3  function escapeJsString(value) {
  4      return JSON.stringify(value);
  5  }
  6  function buildApplyFilterEvaluate(filter) {
  7      return `
  8      () => {
  9        const filter = ${escapeJsString(filter)};
 10        const labels = Array.from(document.querySelectorAll('label'));
 11        const normalize = (value) => (value == null ? '' : String(value).trim().toLowerCase());
 12        const target = labels.find((label) => {
 13          const component = normalize(label.getAttribute('data-component-name'));
 14          const imgAlt = normalize(label.querySelector('img')?.getAttribute('alt'));
 15          const text = normalize(label.textContent);
 16  
 17          if (filter === 'rocket') {
 18            return (
 19              component.includes('deliveryfilteroption-rocket_luxury,rocket_wow,coupang_global') ||
 20              imgAlt.includes('rocket_luxury,rocket_wow,coupang_global') ||
 21              imgAlt.includes('rocket-all') ||
 22              text.includes('로켓')
 23            );
 24          }
 25  
 26          return component.includes(filter) || imgAlt.includes(filter) || text.includes(filter);
 27        });
 28  
 29        if (!target) {
 30          return { ok: false, reason: 'FILTER_NOT_FOUND' };
 31        }
 32  
 33        target.click();
 34  
 35        return {
 36          ok: true,
 37          reason: 'FILTER_CLICKED',
 38          component: target.getAttribute('data-component-name') || '',
 39          text: (target.textContent || '').trim(),
 40          alt: target.querySelector('img')?.getAttribute('alt') || '',
 41        };
 42      }
 43    `;
 44  }
 45  function buildCurrentLocationEvaluate() {
 46      return `
 47      () => ({
 48        href: location.href
 49      })
 50    `;
 51  }
 52  function buildSearchEvaluate(query, limit, pageNumber) {
 53      return `
 54      (async () => {
 55        const query = ${escapeJsString(query)};
 56        const limit = ${limit};
 57        const pageNumber = ${pageNumber};
 58  
 59        const normalizeText = (value) => (value == null ? '' : String(value).trim());
 60        const parseNum = (value) => {
 61          const text = normalizeText(value).replace(/[^\\d.]/g, '');
 62          if (!text) return null;
 63          const num = Number(text);
 64          return Number.isFinite(num) ? num : null;
 65        };
 66        const extractPriceFromText = (text) => {
 67          const matches = normalizeText(text).match(/\\d{1,3}(?:,\\d{3})*원/g) || [];
 68          if (!matches.length) return '';
 69          if (matches.length >= 2) return matches[matches.length - 2];
 70          return matches[0];
 71        };
 72        const extractPriceInfo = (root) => {
 73          const priceArea =
 74            root.querySelector('.PriceArea_priceArea__NntJz, [class*="PriceArea_priceArea"], [class*="priceArea"]') ||
 75            root;
 76          const priceAreaText = normalizeText(priceArea.textContent || '');
 77          const originalPrice = normalizeText(
 78            priceArea.querySelector(
 79              'del, .base-price, .origin-price, .original-price, .strike-price, [class*="base-price"], [class*="origin-price"], [class*="line-through"]'
 80            )?.textContent || ''
 81          );
 82          const originalPriceNum = parseNum(originalPrice);
 83          const unitPrice =
 84            normalizeText(
 85              priceArea.querySelector('.unit-price, [class*="unit-price"], [class*="unitPrice"]')?.textContent || ''
 86            ) ||
 87            priceAreaText.match(/\\([^)]*당\\s*[^)]*원[^)]*\\)/)?.[0] ||
 88            '';
 89  
 90          const candidates = Array.from(priceArea.querySelectorAll('span, strong, div'))
 91            .map((node) => {
 92              const text = normalizeText(node.textContent || '');
 93              if (!text || !/\\d/.test(text)) return null;
 94              if (/\\d{1,2}:\\d{2}:\\d{2}/.test(text)) return null;
 95              if (/당\\s*\\d/.test(text)) return null;
 96              if (/^\\d+%$/.test(text)) return null;
 97  
 98              const num = parseNum(text);
 99              if (num == null) return null;
100  
101              const className = normalizeText(node.getAttribute('class') || '').toLowerCase();
102              let score = 0;
103              if (/price|sale|selling|final/.test(className)) score += 6;
104              if (/red/.test(className)) score += 5;
105              if (/font-bold|bold/.test(className)) score += 3;
106              if (/line-through/.test(className)) score -= 12;
107              if (text.includes('원')) score += 2;
108              if (originalPriceNum != null && num === originalPriceNum) score -= 10;
109              if (num < 100) score -= 10;
110  
111              return { text, num, score };
112            })
113            .filter(Boolean)
114            .sort((a, b) => {
115              if (b.score !== a.score) return b.score - a.score;
116              if (originalPriceNum != null) {
117                const aPrefer = a.num !== originalPriceNum ? 1 : 0;
118                const bPrefer = b.num !== originalPriceNum ? 1 : 0;
119                if (bPrefer !== aPrefer) return bPrefer - aPrefer;
120              }
121              return b.num - a.num;
122            });
123  
124          const currentPrice =
125            normalizeText(candidates.find((candidate) => candidate.num !== originalPriceNum)?.text || '') ||
126            normalizeText(candidates[0]?.text || '') ||
127            extractPriceFromText(priceAreaText) ||
128            '';
129  
130          return {
131            price: currentPrice,
132            originalPrice,
133            unitPrice,
134          };
135        };
136        const canonicalUrl = (url, productId) => {
137          if (url) {
138            try {
139              const parsed = new URL(url, 'https://www.coupang.com');
140              const match = parsed.pathname.match(/\\/vp\\/products\\/(\\d+)/);
141              return 'https://www.coupang.com/vp/products/' + (match?.[1] || productId || '');
142            } catch {}
143          }
144          return productId ? 'https://www.coupang.com/vp/products/' + productId : '';
145        };
146          const normalize = (raw) => {
147            const rawText = normalizeText(raw.text || raw.badgeText || raw.deliveryText || raw.summary);
148          const productId = normalizeText(
149            raw.productId || raw.product_id || raw.id || raw.productNo ||
150            raw?.product?.productId || raw?.item?.id
151          ).match(/(\\d{6,})/)?.[1] || '';
152          const title = normalizeText(
153            raw.title || raw.name || raw.productName || raw.productTitle || raw.itemName
154          );
155          const price = parseNum(raw.price || raw.salePrice || raw.finalPrice || raw.sellingPrice);
156          const originalPrice = parseNum(raw.originalPrice || raw.basePrice || raw.listPrice || raw.originPrice);
157          const unitPrice = normalizeText(raw.unitPrice || raw.unit_price || raw.unitPriceText);
158          const rating = parseNum(raw.rating || raw.star || raw.reviewRating);
159          const reviewCount = parseNum(raw.reviewCount || raw.ratingCount || raw.reviewCnt || raw.reviews);
160          const badge = Array.isArray(raw.badges) ? raw.badges.map(normalizeText).filter(Boolean).join(', ') : normalizeText(raw.badge || raw.labels);
161          const seller = normalizeText(raw.seller || raw.sellerName || raw.vendorName || raw.merchantName);
162          const category = normalizeText(raw.category || raw.categoryName || raw.categoryPath);
163          const discountRate = parseNum(raw.discountRate || raw.discount || raw.discountPercent);
164          const url = canonicalUrl(raw.url || raw.productUrl || raw.link, productId);
165          return {
166            productId,
167            title,
168            price,
169            originalPrice,
170            unitPrice,
171            discountRate,
172            rating,
173            reviewCount,
174            rocket: normalizeText(raw.rocket || raw.rocketType),
175            deliveryType: normalizeText(raw.deliveryType || raw.deliveryBadge || raw.shippingType || raw.shippingBadge),
176            deliveryPromise: normalizeText(raw.deliveryPromise || raw.promise || raw.arrivalText || raw.arrivalBadge),
177            seller,
178            badge,
179            category,
180            url,
181          };
182        };
183  
184        const byApi = async () => {
185          const candidates = [
186            '/np/search?q=' + encodeURIComponent(query) + '&component=&channel=user&page=' + pageNumber,
187            '/np/search?component=&q=' + encodeURIComponent(query) + '&channel=user&page=' + pageNumber,
188          ];
189  
190          for (const path of candidates) {
191            try {
192              const resp = await fetch(path, { credentials: 'include' });
193              if (!resp.ok) continue;
194              const text = await resp.text();
195              const data = text.trim().startsWith('<') ? null : JSON.parse(text);
196              const maybeItems =
197                data?.data?.products ||
198                data?.data?.productList ||
199                data?.products ||
200                data?.productList ||
201                data?.items;
202              if (Array.isArray(maybeItems) && maybeItems.length) {
203                return maybeItems.slice(0, limit).map(normalize);
204              }
205            } catch {}
206          }
207          return [];
208        };
209  
210        const byBootstrap = () => {
211          const isProductLike = (item) => {
212            if (!item || typeof item !== 'object') return false;
213            const values = [item.productId, item.product_id, item.id, item.productNo, item.url, item.productUrl, item.link, item.title, item.productName];
214            return values.some((value) => /\\/vp\\/products\\/|\\d{6,}/.test(normalizeText(value)));
215          };
216  
217          const collectProducts = (node) => {
218            const queue = [node];
219            while (queue.length) {
220              const current = queue.shift();
221              if (!current || typeof current !== 'object') continue;
222              if (Array.isArray(current)) {
223                const productish = current.filter(isProductLike);
224                if (productish.length >= 3) return productish.slice(0, limit).map(normalize);
225                queue.push(...current.slice(0, 50));
226                continue;
227              }
228              for (const value of Object.values(current)) queue.push(value);
229            }
230            return [];
231          };
232  
233          const scriptNodes = Array.from(document.scripts);
234          for (const script of scriptNodes) {
235            const text = script.textContent || '';
236            if (!text || !/product|search/i.test(text)) continue;
237            const arrayMatches = [
238              ...text.matchAll(/"products?"\\s*:\\s*(\\[[\\s\\S]{100,}?\\])/g),
239              ...text.matchAll(/"itemList"\\s*:\\s*(\\[[\\s\\S]{100,}?\\])/g),
240            ];
241            for (const match of arrayMatches) {
242              try {
243                const products = JSON.parse(match[1]);
244                if (Array.isArray(products) && products.length) {
245                  return products.slice(0, limit).map(normalize);
246                }
247              } catch {}
248            }
249          }
250  
251          const globals = [
252            window.__NEXT_DATA__,
253            window.__APOLLO_STATE__,
254            window.__INITIAL_STATE__,
255            window.__STATE__,
256            window.__PRELOADED_STATE__,
257          ];
258          for (const candidate of globals) {
259            if (!candidate || typeof candidate !== 'object') continue;
260            const found = collectProducts(candidate);
261            if (found.length) return found;
262          }
263          return [];
264        };
265  
266        const byJsonLd = () => {
267          const scripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
268          for (const script of scripts) {
269            const text = script.textContent || '';
270            if (!text) continue;
271            try {
272              const payload = JSON.parse(text);
273              const docs = Array.isArray(payload) ? payload : [payload];
274              for (const doc of docs) {
275                const items =
276                  doc?.itemListElement ||
277                  doc?.about?.itemListElement ||
278                  doc?.mainEntity?.itemListElement ||
279                  [];
280                if (!Array.isArray(items) || !items.length) continue;
281                const mapped = items.map((entry) => {
282                  const item = entry?.item || entry;
283                  return normalize({
284                    productId: item?.url || item?.sku || item?.productID,
285                    title: item?.name,
286                    price: item?.offers?.price,
287                    originalPrice: item?.offers?.highPrice,
288                    rating: item?.aggregateRating?.ratingValue,
289                    reviewCount: item?.aggregateRating?.reviewCount,
290                    seller: item?.offers?.seller?.name,
291                    badge: item?.offers?.availability,
292                    category: item?.category,
293                    url: item?.url,
294                  });
295                }).filter((item) => item.productId || item.url || item.title);
296                if (mapped.length) return mapped.slice(0, limit);
297              }
298            } catch {}
299          }
300          return [];
301        };
302  
303        const byDom = () => {
304          const domScanLimit = Math.max(limit * 6, 60);
305          const cards = Array.from(new Set([
306            ...document.querySelectorAll('li.search-product'),
307            ...document.querySelectorAll('li[class*="search-product"], div[class*="search-product"], article[class*="search-product"]'),
308            ...document.querySelectorAll('li[class*="ProductUnit_productUnit"], [class*="ProductUnit_productUnit"]'),
309            ...document.querySelectorAll('.impression-logged, [class*="promotion-item"], [class*="product-item"]'),
310            ...document.querySelectorAll('[data-product-id]'),
311            ...document.querySelectorAll('[data-id]'),
312            ...document.querySelectorAll('a[href*="/vp/products/"]'),
313          ])).slice(0, domScanLimit);
314          const items = [];
315          for (const el of cards) {
316            const root = el.closest('li, div, article, section') || el;
317            const html = root.innerHTML || '';
318            const priceInfo = extractPriceInfo(root);
319            const badgeImages = Array.from(root.querySelectorAll('img[data-badge-id]'));
320            const badgeIds = badgeImages
321              .map((node) => node.getAttribute('data-badge-id') || '')
322              .filter(Boolean);
323            const badgeSrcText = badgeImages
324              .map((node) => (node.getAttribute('data-badge-id') || '') + ' ' + (node.getAttribute('src') || ''))
325              .join(' ');
326            const productId =
327              root.getAttribute('data-product-id') ||
328              el.getAttribute('data-product-id') ||
329              root.querySelector('a[href*="/vp/products/"]')?.getAttribute('data-product-id') ||
330              root.querySelector('a[href*="/vp/products/"]')?.getAttribute('href')?.match(/\\/vp\\/products\\/(\\d+)/)?.[1] ||
331              html.match(/\\/vp\\/products\\/(\\d+)/)?.[1] ||
332              (el.getAttribute('href') || '').match(/\\/vp\\/products\\/(\\d+)/)?.[1] ||
333              '';
334            const title =
335              root.querySelector('.name, .title, .product-name, .search-product-title, .item-title, .ProductUnit_productNameV2__cV9cw, [class*="ProductUnit_productName"], [class*="productName"], [class*="product-name"], [class*="title"]')?.textContent ||
336              root.querySelector('img[alt]')?.getAttribute('alt') ||
337              html.match(/alt="([^"]+)"/)?.[1] ||
338              (root.textContent || '').replace(/\\s+/g, ' ').trim().match(/^(.+?)(\\d{1,3},\\d{3}원|무료배송|내일\\(|오늘\\(|새벽)/)?.[1] ||
339              el.getAttribute('title') ||
340              '';
341            const price = priceInfo.price || '';
342            const originalPrice = priceInfo.originalPrice || '';
343            const unitPrice = priceInfo.unitPrice || '';
344            const rating =
345              root.querySelector('.rating, .star em, [class*="rating"], [class*="star"], [class*="ProductRating"] [aria-label], [aria-label][class*="ProductRating"]')?.getAttribute?.('aria-label') ||
346              root.querySelector('.rating, .star em, [class*="rating"], [class*="star"], [class*="ProductRating"]')?.textContent ||
347              '';
348            const reviewCount =
349              root.querySelector('.rating-total-count, .count, .review-count, .promotion-item-review-count, [class*="review"], [class*="count"], [class*="ProductRating"] span, [class*="ProductRating"] [class*="fw-text"]')?.textContent ||
350              '';
351            const seller =
352              root.querySelector('.seller, .vendor, .search-product-wrap .vendor-name, [class*="vendor"], [class*="seller"]')?.textContent ||
353              '';
354            const category =
355              root.getAttribute('data-category') ||
356              root.querySelector('[class*="category"]')?.textContent ||
357              '';
358            const text = (root.textContent || '').replace(/\\s+/g, ' ').trim();
359            const badgeNodes = Array.from(root.querySelectorAll('.badge, .delivery, .tag, .icon-service, .pdd-text, .delivery-text, [class*="badge"], [class*="delivery"]'));
360            const hrefNode = root.querySelector('a[href*="/vp/products/"]');
361            items.push(normalize({
362              productId,
363              title,
364              price,
365              originalPrice,
366              unitPrice,
367              rating,
368              reviewCount,
369              seller,
370              badges: [...badgeIds, ...badgeNodes.map((node) => node.textContent || '').filter(Boolean)],
371              rocket: badgeSrcText + ' ' + badgeNodes.map((node) => node.textContent || '').join(' '),
372              deliveryType: badgeNodes.map((node) => node.textContent || '').join(' ') + ' ' + text,
373              deliveryPromise: badgeNodes.map((node) => node.textContent || '').join(' ') + ' ' + text,
374              category,
375              text,
376              url: hrefNode?.getAttribute('href') || '',
377            }));
378          }
379          return items.slice(0, domScanLimit);
380        };
381  
382        let items = await byApi();
383        if (!items.length) items = byJsonLd();
384        if (!items.length) items = byBootstrap();
385        const domItems = byDom();
386        if (!items.length) items = domItems;
387  
388        return {
389          loginHints: {
390            hasLoginLink: Boolean(document.querySelector('a[href*="login"], a[title*="로그인"]')),
391            hasMyCoupang: /마이쿠팡/.test(document.body.innerText),
392          },
393          items,
394          domItems,
395        };
396      })()
397    `;
398  }
399  cli({
400      site: 'coupang',
401      name: 'search',
402      description: 'Search Coupang products with logged-in browser session',
403      domain: 'www.coupang.com',
404      strategy: Strategy.COOKIE,
405      browser: true,
406      args: [
407          { name: 'query', required: true, positional: true, help: 'Search keyword' },
408          { name: 'page', type: 'int', default: 1, help: 'Search result page number' },
409          { name: 'limit', type: 'int', default: 20, help: 'Max results (max 50)' },
410          { name: 'filter', required: false, help: 'Optional search filter (currently supports: rocket)' },
411      ],
412      columns: ['rank', 'title', 'price', 'unit_price', 'rating', 'review_count', 'rocket', 'delivery_type', 'delivery_promise', 'url'],
413      func: async (page, kwargs) => {
414          const query = String(kwargs.query || '').trim();
415          const pageNumber = Math.max(Number(kwargs.page || 1), 1);
416          const limit = Math.min(Math.max(Number(kwargs.limit || 20), 1), 50);
417          const filter = String(kwargs.filter || '').trim().toLowerCase();
418          if (!query)
419              throw new Error('Query is required');
420          const initialPage = filter ? 1 : pageNumber;
421          const url = `https://www.coupang.com/np/search?q=${encodeURIComponent(query)}&channel=user&page=${initialPage}`;
422          await page.goto(url);
423          if (filter) {
424              const filterResult = await page.evaluate(buildApplyFilterEvaluate(filter));
425              if (!filterResult?.ok) {
426                  throw new Error(`Unsupported or unavailable filter: ${filter}`);
427              }
428              await page.wait(3);
429              if (pageNumber > 1) {
430                  const locationInfo = await page.evaluate(buildCurrentLocationEvaluate());
431                  const filteredUrl = new URL(locationInfo?.href || url);
432                  filteredUrl.searchParams.set('page', String(pageNumber));
433                  await page.goto(filteredUrl.toString());
434              }
435          }
436          await page.autoScroll({ times: filter ? 3 : 2, delayMs: 1500 });
437          const raw = await page.evaluate(buildSearchEvaluate(query, limit, pageNumber));
438          const loginHints = raw?.loginHints ?? {};
439          const items = Array.isArray(raw?.items) ? raw.items : [];
440          const domItems = Array.isArray(raw?.domItems) ? raw.domItems : [];
441          const normalizedBase = sanitizeSearchItems(items.map((item, index) => normalizeSearchItem(item, index)), limit);
442          const normalizedDom = sanitizeSearchItems(domItems.map((item, index) => normalizeSearchItem(item, index)), Math.max(limit * 6, 60));
443          const normalized = filter
444              ? sanitizeSearchItems(normalizedDom, limit)
445              : mergeSearchItems(normalizedBase, normalizedDom, limit);
446          if (!normalized.length && loginHints.hasLoginLink && !loginHints.hasMyCoupang) {
447              throw new Error('Coupang login required. Please log into Coupang in Chrome and retry.');
448          }
449          return normalized;
450      },
451  });