/ clis / 1688 / shared.js
shared.js
  1  import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
  2  export const SITE = '1688';
  3  export const HOME_URL = 'https://www.1688.com/';
  4  export const SEARCH_URL_PREFIX = 'https://s.1688.com/selloffer/offer_search.htm?charset=utf8&keywords=';
  5  export const DETAIL_URL_PREFIX = 'https://detail.1688.com/offer/';
  6  export const STORE_MOBILE_URL_PREFIX = 'https://winport.m.1688.com/page/index.html?memberId=';
  7  export const STRATEGY = 'cookie';
  8  export const SEARCH_LIMIT_DEFAULT = 20;
  9  export const SEARCH_LIMIT_MAX = 100;
 10  const STORE_GENERIC_HOSTS = new Set(['www', 'detail', 's', 'winport', 'work', 'air', 'dj']);
 11  const TRACKING_QUERY_KEYS = new Set([
 12      'spm',
 13      'tracelog',
 14      'clickid',
 15      'source',
 16      'scene',
 17      'from',
 18      'src',
 19      'ns',
 20      'cna',
 21      'pvid',
 22  ]);
 23  const CAPTCHA_URL_MARKER = '/_____tmd_____/punish';
 24  const CAPTCHA_TEXT_PATTERNS = [
 25      '请拖动下方滑块完成验证',
 26      '请按住滑块,拖动到最右边',
 27      '通过验证以确保正常访问',
 28      '验证码拦截',
 29      '访问验证',
 30      '滑动验证',
 31  ];
 32  const LOGIN_TEXT_PATTERNS = [
 33      '请登录',
 34      '登录后',
 35      '账号登录',
 36      '手机登录',
 37      '立即登录',
 38      '扫码登录',
 39      '请先完成登录',
 40      '请先登录后查看',
 41  ];
 42  const LOGIN_URL_PATTERNS = ['/member/login', 'passport', 'login.taobao.com', 'account.1688.com'];
 43  export const FACTORY_BADGE_PATTERNS = [
 44      '源头工厂',
 45      '深度验厂',
 46      '实力工厂',
 47      '工厂档案',
 48      '加工专区',
 49      '验厂报告',
 50      '厂家直销',
 51      '生产厂家',
 52      '工厂直供',
 53  ];
 54  export const SERVICE_BADGE_PATTERNS = [
 55      '延期必赔',
 56      '品质保障',
 57      '破损包赔',
 58      '退货包运费',
 59      '晚发必赔',
 60      '7*24小时响应',
 61      '48小时发货',
 62      '72小时发货',
 63      '后天达',
 64      '包邮',
 65      '闪电拿样',
 66  ];
 67  const CHINA_LOCATIONS = [
 68      '北京',
 69      '天津',
 70      '上海',
 71      '重庆',
 72      '河北',
 73      '山西',
 74      '辽宁',
 75      '吉林',
 76      '黑龙江',
 77      '江苏',
 78      '浙江',
 79      '安徽',
 80      '福建',
 81      '江西',
 82      '山东',
 83      '河南',
 84      '湖北',
 85      '湖南',
 86      '广东',
 87      '海南',
 88      '四川',
 89      '贵州',
 90      '云南',
 91      '陕西',
 92      '甘肃',
 93      '青海',
 94      '台湾',
 95      '内蒙古',
 96      '广西',
 97      '西藏',
 98      '宁夏',
 99      '新疆',
100      '香港',
101      '澳门',
102  ];
103  export function cleanText(value) {
104      return typeof value === 'string'
105          ? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim()
106          : '';
107  }
108  export function cleanMultilineText(value) {
109      return typeof value === 'string'
110          ? value
111              .replace(/\u00a0/g, ' ')
112              .split('\n')
113              .map((line) => line.replace(/\s+/g, ' ').trim())
114              .filter(Boolean)
115              .join('\n')
116          : '';
117  }
118  export function uniqueNonEmpty(values) {
119      return [...new Set(values.map((value) => cleanText(value)).filter(Boolean))];
120  }
121  export function parseSearchLimit(input) {
122      const parsed = Number.parseInt(String(input ?? SEARCH_LIMIT_DEFAULT), 10);
123      if (!Number.isFinite(parsed) || parsed < 1) {
124          throw new ArgumentError('1688 search --limit must be a positive integer', 'Example: opencli 1688 search "桌面置物架" --limit 20');
125      }
126      return Math.min(SEARCH_LIMIT_MAX, parsed);
127  }
128  export function buildSearchUrl(query) {
129      const normalized = cleanText(query);
130      if (!normalized) {
131          throw new ArgumentError('1688 search query cannot be empty', 'Example: opencli 1688 search "桌面置物架" --limit 20');
132      }
133      return `${SEARCH_URL_PREFIX}${encodeURIComponent(normalized)}`;
134  }
135  export function buildDetailUrl(input) {
136      const offerId = extractOfferId(input);
137      if (!offerId) {
138          throw new ArgumentError('1688 item expects an offer URL or offer ID', 'Example: opencli 1688 item 887904326744');
139      }
140      return `${DETAIL_URL_PREFIX}${offerId}.html`;
141  }
142  export function resolveStoreUrl(input) {
143      const normalized = cleanText(input);
144      if (!normalized) {
145          throw new ArgumentError('1688 store expects a store URL or member ID', 'Example: opencli 1688 store https://yinuoweierfushi.1688.com/');
146      }
147      const memberId = extractMemberId(normalized);
148      if (memberId) {
149          return `${STORE_MOBILE_URL_PREFIX}${memberId}`;
150      }
151      if (/^https?:\/\//i.test(normalized)) {
152          return canonicalizeStoreUrl(normalized);
153      }
154      if (normalized.endsWith('.1688.com')) {
155          return canonicalizeStoreUrl(`https://${normalized}`);
156      }
157      if (/^[a-z0-9-]+$/i.test(normalized)) {
158          return canonicalizeStoreUrl(`https://${normalized}.1688.com`);
159      }
160      throw new ArgumentError('1688 store expects a store URL or member ID', 'Example: opencli 1688 store b2b-22154705262941f196');
161  }
162  export function canonicalizeStoreUrl(input) {
163      const url = parse1688Url(input);
164      const memberId = extractMemberId(url.toString());
165      if (memberId) {
166          return `${STORE_MOBILE_URL_PREFIX}${memberId}`;
167      }
168      const host = normalizeStoreHost(url.hostname);
169      if (!host) {
170          throw new ArgumentError('Invalid 1688 store URL', 'Example: opencli 1688 store https://yinuoweierfushi.1688.com/');
171      }
172      return `https://${host}`;
173  }
174  export function canonicalizeItemUrl(input) {
175      const offerId = extractOfferId(input);
176      if (offerId) {
177          return `${DETAIL_URL_PREFIX}${offerId}.html`;
178      }
179      const url = parse1688UrlOrNull(input);
180      if (!url)
181          return null;
182      stripTrackingParams(url);
183      url.hash = '';
184      return url.toString();
185  }
186  export function canonicalizeSellerUrl(input) {
187      const memberId = extractMemberId(input);
188      if (memberId) {
189          return `${STORE_MOBILE_URL_PREFIX}${memberId}`;
190      }
191      const url = parse1688UrlOrNull(input);
192      if (!url)
193          return null;
194      const host = normalizeStoreHost(url.hostname);
195      if (!host)
196          return null;
197      return `https://${host}`;
198  }
199  export function extractOfferId(input) {
200      const normalized = cleanText(input);
201      if (!normalized)
202          return null;
203      const directId = normalized.match(/^\d{6,}$/)?.[0];
204      if (directId)
205          return directId;
206      const detailMatch = normalized.match(/\/offer\/(\d{6,})\.html/i);
207      if (detailMatch)
208          return detailMatch[1];
209      const queryMatch = normalized.match(/[?&]offerId=(\d{6,})/i);
210      if (queryMatch)
211          return queryMatch[1];
212      return null;
213  }
214  export function extractMemberId(input) {
215      const normalized = cleanText(input);
216      if (!normalized)
217          return null;
218      const direct = normalized.match(/\bb2b-[a-z0-9]+\b/i)?.[0];
219      if (direct)
220          return direct;
221      const queryMatch = normalized.match(/[?&]memberId=(b2b-[a-z0-9]+)/i);
222      if (queryMatch)
223          return queryMatch[1];
224      const mobileMatch = normalized.match(/\/winport\/(b2b-[a-z0-9]+)\.html/i);
225      if (mobileMatch)
226          return mobileMatch[1];
227      return null;
228  }
229  export function extractShopId(input) {
230      const normalized = cleanText(input);
231      if (!normalized)
232          return null;
233      try {
234          const url = new URL(/^https?:\/\//i.test(normalized) ? normalized : `https://${normalized}`);
235          const host = normalizeStoreHost(url.hostname);
236          if (!host)
237              return null;
238          return host.split('.')[0] ?? null;
239      }
240      catch {
241          return /^[a-z0-9-]+$/i.test(normalized) ? normalized : null;
242      }
243  }
244  export function buildProvenance(sourceUrl) {
245      return {
246          source_url: sourceUrl,
247          fetched_at: new Date().toISOString(),
248          strategy: STRATEGY,
249      };
250  }
251  export function parsePriceText(text) {
252      const normalized = normalizeNumericText(cleanText(text));
253      const matches = normalized.match(/\d+(?:,\d{3})*(?:\.\d+)?/g) ?? [];
254      const values = matches
255          .map((value) => Number.parseFloat(value.replace(/,/g, '')))
256          .filter((value) => Number.isFinite(value));
257      if (values.length === 0) {
258          return {
259              price_text: normalized,
260              price_min: null,
261              price_max: null,
262              currency: null,
263          };
264      }
265      return {
266          price_text: normalized,
267          price_min: values[0] ?? null,
268          price_max: values[values.length - 1] ?? values[0] ?? null,
269          currency: normalized.includes('¥') || normalized.includes('元') ? 'CNY' : null,
270      };
271  }
272  export function normalizePriceTiers(rawTiers, unit) {
273      return rawTiers
274          .map((tier) => {
275          const quantityMin = toNumber(tier.beginAmount);
276          const priceText = cleanText(tier.price);
277          const price = toNumber(tier.price);
278          return {
279              quantity_text: quantityMin !== null ? `${quantityMin}${unit ?? ''}` : '',
280              quantity_min: quantityMin,
281              price_text: priceText,
282              price,
283              currency: priceText ? 'CNY' : null,
284          };
285      })
286          .filter((tier) => tier.price_text);
287  }
288  export function parseMoqText(text) {
289      const normalized = normalizeNumericText(cleanText(text));
290      const match = normalized.match(/(\d+(?:\.\d+)?)\s*(件|个|套|箱|包|双|台|把|只|pcs|piece|pieces)?\s*起批/i)
291          ?? normalized.match(/≥\s*(\d+(?:\.\d+)?)/);
292      const rangeMatch = normalized.match(/(\d+(?:\.\d+)?)\s*(?:~|-|至|到)\s*\d+(?:\.\d+)?\s*(件|个|套|箱|包|双|台|把|只|pcs|piece|pieces)/i);
293      if (!match && !rangeMatch) {
294          return {
295              moq_text: normalized,
296              moq_value: null,
297          };
298      }
299      return {
300          moq_text: normalized,
301          moq_value: Number.parseFloat((match ?? rangeMatch)[1]),
302      };
303  }
304  export function extractLocation(text) {
305      const normalized = cleanMultilineText(text);
306      const primaryRegion = normalized.split(/送至|发往/)[0] ?? normalized;
307      const lines = primaryRegion.split('\n');
308      for (const line of lines) {
309          const compact = cleanText(line);
310          if (!compact || compact.length > 16)
311              continue;
312          if (CHINA_LOCATIONS.some((location) => compact.startsWith(location))) {
313              return compact;
314          }
315      }
316      const locationPattern = new RegExp(`(${CHINA_LOCATIONS.join('|')})[\\u4e00-\\u9fa5]{0,8}`);
317      return primaryRegion.match(locationPattern)?.[0] ?? null;
318  }
319  export function extractAddress(text) {
320      const normalized = cleanMultilineText(text);
321      const lineMatch = normalized.match(/地址[::]\s*([^\n]+)/);
322      if (lineMatch)
323          return cleanText(lineMatch[1]);
324      return normalized
325          .split('\n')
326          .map((line) => cleanText(line))
327          .find((line) => line.includes('省') || line.includes('市') || line.includes('区') || line.includes('县'))
328          ?? null;
329  }
330  export function extractMetric(text, label) {
331      const normalized = cleanMultilineText(text);
332      const direct = normalized.match(new RegExp(`(?:^|\\n)\\s*${escapeForRegex(label)}[::]?\\s*([^\\n]+)`));
333      if (direct)
334          return cleanText(direct[1]);
335      const lineBased = normalized.match(new RegExp(`(?:^|\\n)\\s*${escapeForRegex(label)}\\n([^\\n]+)`));
336      return lineBased ? cleanText(lineBased[1]) : null;
337  }
338  export function extractYearsOnPlatform(text) {
339      return text.match(/入驻\d+年/)?.[0] ?? null;
340  }
341  export function extractMainBusiness(text) {
342      const value = extractMetric(text, '主营');
343      return value ? value.replace(/^:/, '').trim() : null;
344  }
345  export function extractBadges(text, candidates) {
346      return uniqueNonEmpty(candidates.filter((candidate) => cleanMultilineText(text).includes(candidate)));
347  }
348  export function guessTopCategories(text) {
349      const mainBusiness = extractMainBusiness(text);
350      if (!mainBusiness)
351          return [];
352      return uniqueNonEmpty(mainBusiness.split(/[、,/|]/).map((value) => value.trim()));
353  }
354  export function isCaptchaState(state) {
355      const href = cleanText(state.href).toLowerCase();
356      const title = cleanText(state.title);
357      const bodyText = cleanMultilineText(state.body_text);
358      if (href.includes(CAPTCHA_URL_MARKER))
359          return true;
360      return CAPTCHA_TEXT_PATTERNS.some((pattern) => title.includes(pattern) || bodyText.includes(pattern));
361  }
362  export function isLoginState(state) {
363      const href = cleanText(state.href).toLowerCase();
364      const title = cleanText(state.title);
365      const bodyText = cleanMultilineText(state.body_text);
366      if (LOGIN_URL_PATTERNS.some((pattern) => href.includes(pattern)))
367          return true;
368      return LOGIN_TEXT_PATTERNS.some((pattern) => title.includes(pattern) || bodyText.includes(pattern));
369  }
370  export function buildCaptchaHint(action) {
371      return [
372          `Open a clean 1688 ${action} page in the shared Chrome profile and finish any slider challenge first.`,
373          'If you run opencli via CDP, set OPENCLI_CDP_TARGET=1688.com or a more specific 1688 host before retrying.',
374      ].join(' ');
375  }
376  export async function readPageState(page) {
377      const result = await page.evaluate(`
378      (() => ({
379        href: window.location.href,
380        title: document.title || '',
381        body_text: document.body ? document.body.innerText || '' : '',
382      }))()
383    `);
384      return {
385          href: cleanText(result.href),
386          title: cleanText(result.title),
387          body_text: cleanMultilineText(result.body_text),
388      };
389  }
390  export async function gotoAndReadState(page, url, settleMs = 2500, action = 'page') {
391      try {
392          await page.goto(url, { settleMs });
393          await page.wait(1.5);
394          return readPageState(page);
395      }
396      catch (error) {
397          const message = error instanceof Error ? error.message : String(error);
398          if (message.includes('Inspected target navigated or closed')
399              || message.includes('Cannot find context with specified id')
400              || message.includes('Target closed')) {
401              throw new CommandExecutionError(`1688 ${action} navigation lost the current browser target`, `${buildCaptchaHint(action)} If CDP is attached to a stale or blocked tab, open a fresh 1688 tab and point OPENCLI_CDP_TARGET at that tab.`);
402          }
403          throw error;
404      }
405  }
406  export async function ensure1688Session(page) {
407      const state = await gotoAndReadState(page, HOME_URL, 1500, 'homepage');
408      assertAuthenticatedState(state, 'homepage');
409  }
410  export function assertAuthenticatedState(state, action) {
411      if (!isCaptchaState(state) && !isLoginState(state))
412          return;
413      throw new AuthRequiredError('1688.com', `请先在共享 Chrome 完成 1688 登录/验证,再重试(${action})`);
414  }
415  export function assertNotCaptcha(state, action) {
416      assertAuthenticatedState(state, action);
417  }
418  export function toNumber(value) {
419      if (typeof value === 'number' && Number.isFinite(value)) {
420          return value;
421      }
422      if (typeof value === 'string') {
423          const normalized = value.replace(/,/g, '').trim();
424          if (!normalized)
425              return null;
426          const parsed = Number.parseFloat(normalized);
427          return Number.isFinite(parsed) ? parsed : null;
428      }
429      return null;
430  }
431  export function limitCandidates(values, limit) {
432      const normalizedLimit = Math.max(1, Math.trunc(limit) || 1);
433      return values.slice(0, normalizedLimit);
434  }
435  export function normalizeMediaUrl(input) {
436      const raw = cleanText(input);
437      if (!raw)
438          return '';
439      let value = raw
440          .replace(/^url\((.*)\)$/i, '$1')
441          .replace(/^['"]|['"]$/g, '')
442          .replace(/\\u002F/g, '/')
443          .replace(/&amp;/g, '&')
444          .trim();
445      if (!value || value.startsWith('data:') || value.startsWith('blob:'))
446          return '';
447      if (value.startsWith('//'))
448          value = `https:${value}`;
449      try {
450          const url = new URL(value);
451          return url.toString();
452      }
453      catch {
454          return '';
455      }
456  }
457  export function uniqueMediaSources(values) {
458      const seen = new Set();
459      const result = [];
460      for (const value of values) {
461          const url = normalizeMediaUrl(value.url);
462          if (!url)
463              continue;
464          const key = `${value.type}:${url}`;
465          if (seen.has(key))
466              continue;
467          seen.add(key);
468          result.push({
469              ...value,
470              url,
471              source: cleanText(value.source) || undefined,
472          });
473      }
474      return result;
475  }
476  function normalizeNumericText(value) {
477      return value
478          .replace(/([¥$€])\s+(?=\d)/g, '$1')
479          .replace(/(\d)\s*\.\s*(\d)/g, '$1.$2')
480          .replace(/\s*([~-])\s*/g, '$1')
481          .trim();
482  }
483  function escapeForRegex(value) {
484      return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
485  }
486  function parse1688Url(input) {
487      const normalized = cleanText(input);
488      try {
489          const url = new URL(normalized);
490          if (!url.hostname.endsWith('.1688.com') && url.hostname !== '1688.com' && url.hostname !== 'www.1688.com') {
491              throw new Error('invalid-host');
492          }
493          stripTrackingParams(url);
494          url.hash = '';
495          return url;
496      }
497      catch {
498          throw new ArgumentError('Invalid 1688 URL', 'Use a URL under 1688.com (for example: https://detail.1688.com/offer/887904326744.html)');
499      }
500  }
501  function parse1688UrlOrNull(input) {
502      try {
503          return parse1688Url(input);
504      }
505      catch {
506          return null;
507      }
508  }
509  function normalizeStoreHost(hostname) {
510      const lower = cleanText(hostname).toLowerCase();
511      if (!lower.endsWith('.1688.com'))
512          return null;
513      const [subdomain] = lower.split('.');
514      if (!subdomain || STORE_GENERIC_HOSTS.has(subdomain))
515          return null;
516      return lower;
517  }
518  function stripTrackingParams(url) {
519      const keys = [...url.searchParams.keys()];
520      for (const key of keys) {
521          if (TRACKING_QUERY_KEYS.has(key) || key.toLowerCase().startsWith('utm_')) {
522              url.searchParams.delete(key);
523          }
524      }
525  }
526  export const __test__ = {
527      SEARCH_LIMIT_DEFAULT,
528      SEARCH_LIMIT_MAX,
529      parseSearchLimit,
530      buildSearchUrl,
531      buildDetailUrl,
532      resolveStoreUrl,
533      canonicalizeStoreUrl,
534      canonicalizeItemUrl,
535      canonicalizeSellerUrl,
536      extractOfferId,
537      extractMemberId,
538      extractShopId,
539      parsePriceText,
540      normalizePriceTiers,
541      parseMoqText,
542      extractLocation,
543      extractAddress,
544      extractMetric,
545      extractYearsOnPlatform,
546      extractMainBusiness,
547      extractBadges,
548      guessTopCategories,
549      isCaptchaState,
550      isLoginState,
551      cleanText,
552      cleanMultilineText,
553      uniqueNonEmpty,
554      normalizeMediaUrl,
555      uniqueMediaSources,
556      limitCandidates,
557  };