shared.js
1 import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors'; 2 export const SITE = '1688'; 3 export const HOME_URL = 'https://www.1688.com/'; 4 export const SEARCH_URL_PREFIX = 'https://s.1688.com/selloffer/offer_search.htm?charset=utf8&keywords='; 5 export const DETAIL_URL_PREFIX = 'https://detail.1688.com/offer/'; 6 export const STORE_MOBILE_URL_PREFIX = 'https://winport.m.1688.com/page/index.html?memberId='; 7 export const STRATEGY = 'cookie'; 8 export const SEARCH_LIMIT_DEFAULT = 20; 9 export const SEARCH_LIMIT_MAX = 100; 10 const STORE_GENERIC_HOSTS = new Set(['www', 'detail', 's', 'winport', 'work', 'air', 'dj']); 11 const TRACKING_QUERY_KEYS = new Set([ 12 'spm', 13 'tracelog', 14 'clickid', 15 'source', 16 'scene', 17 'from', 18 'src', 19 'ns', 20 'cna', 21 'pvid', 22 ]); 23 const CAPTCHA_URL_MARKER = '/_____tmd_____/punish'; 24 const CAPTCHA_TEXT_PATTERNS = [ 25 '请拖动下方滑块完成验证', 26 '请按住滑块,拖动到最右边', 27 '通过验证以确保正常访问', 28 '验证码拦截', 29 '访问验证', 30 '滑动验证', 31 ]; 32 const LOGIN_TEXT_PATTERNS = [ 33 '请登录', 34 '登录后', 35 '账号登录', 36 '手机登录', 37 '立即登录', 38 '扫码登录', 39 '请先完成登录', 40 '请先登录后查看', 41 ]; 42 const LOGIN_URL_PATTERNS = ['/member/login', 'passport', 'login.taobao.com', 'account.1688.com']; 43 export const FACTORY_BADGE_PATTERNS = [ 44 '源头工厂', 45 '深度验厂', 46 '实力工厂', 47 '工厂档案', 48 '加工专区', 49 '验厂报告', 50 '厂家直销', 51 '生产厂家', 52 '工厂直供', 53 ]; 54 export const SERVICE_BADGE_PATTERNS = [ 55 '延期必赔', 56 '品质保障', 57 '破损包赔', 58 '退货包运费', 59 '晚发必赔', 60 '7*24小时响应', 61 '48小时发货', 62 '72小时发货', 63 '后天达', 64 '包邮', 65 '闪电拿样', 66 ]; 67 const CHINA_LOCATIONS = [ 68 '北京', 69 '天津', 70 '上海', 71 '重庆', 72 '河北', 73 '山西', 74 '辽宁', 75 '吉林', 76 '黑龙江', 77 '江苏', 78 '浙江', 79 '安徽', 80 '福建', 81 '江西', 82 '山东', 83 '河南', 84 '湖北', 85 '湖南', 86 '广东', 87 '海南', 88 '四川', 89 '贵州', 90 '云南', 91 '陕西', 92 '甘肃', 93 '青海', 94 '台湾', 95 '内蒙古', 96 '广西', 97 '西藏', 98 '宁夏', 99 '新疆', 100 '香港', 101 '澳门', 102 ]; 103 export function cleanText(value) { 104 return typeof value === 'string' 105 ? value.replace(/\u00a0/g, ' ').replace(/\s+/g, ' ').trim() 106 : ''; 107 } 108 export function cleanMultilineText(value) { 109 return typeof value === 'string' 110 ? value 111 .replace(/\u00a0/g, ' ') 112 .split('\n') 113 .map((line) => line.replace(/\s+/g, ' ').trim()) 114 .filter(Boolean) 115 .join('\n') 116 : ''; 117 } 118 export function uniqueNonEmpty(values) { 119 return [...new Set(values.map((value) => cleanText(value)).filter(Boolean))]; 120 } 121 export function parseSearchLimit(input) { 122 const parsed = Number.parseInt(String(input ?? SEARCH_LIMIT_DEFAULT), 10); 123 if (!Number.isFinite(parsed) || parsed < 1) { 124 throw new ArgumentError('1688 search --limit must be a positive integer', 'Example: opencli 1688 search "桌面置物架" --limit 20'); 125 } 126 return Math.min(SEARCH_LIMIT_MAX, parsed); 127 } 128 export function buildSearchUrl(query) { 129 const normalized = cleanText(query); 130 if (!normalized) { 131 throw new ArgumentError('1688 search query cannot be empty', 'Example: opencli 1688 search "桌面置物架" --limit 20'); 132 } 133 return `${SEARCH_URL_PREFIX}${encodeURIComponent(normalized)}`; 134 } 135 export function buildDetailUrl(input) { 136 const offerId = extractOfferId(input); 137 if (!offerId) { 138 throw new ArgumentError('1688 item expects an offer URL or offer ID', 'Example: opencli 1688 item 887904326744'); 139 } 140 return `${DETAIL_URL_PREFIX}${offerId}.html`; 141 } 142 export function resolveStoreUrl(input) { 143 const normalized = cleanText(input); 144 if (!normalized) { 145 throw new ArgumentError('1688 store expects a store URL or member ID', 'Example: opencli 1688 store https://yinuoweierfushi.1688.com/'); 146 } 147 const memberId = extractMemberId(normalized); 148 if (memberId) { 149 return `${STORE_MOBILE_URL_PREFIX}${memberId}`; 150 } 151 if (/^https?:\/\//i.test(normalized)) { 152 return canonicalizeStoreUrl(normalized); 153 } 154 if (normalized.endsWith('.1688.com')) { 155 return canonicalizeStoreUrl(`https://${normalized}`); 156 } 157 if (/^[a-z0-9-]+$/i.test(normalized)) { 158 return canonicalizeStoreUrl(`https://${normalized}.1688.com`); 159 } 160 throw new ArgumentError('1688 store expects a store URL or member ID', 'Example: opencli 1688 store b2b-22154705262941f196'); 161 } 162 export function canonicalizeStoreUrl(input) { 163 const url = parse1688Url(input); 164 const memberId = extractMemberId(url.toString()); 165 if (memberId) { 166 return `${STORE_MOBILE_URL_PREFIX}${memberId}`; 167 } 168 const host = normalizeStoreHost(url.hostname); 169 if (!host) { 170 throw new ArgumentError('Invalid 1688 store URL', 'Example: opencli 1688 store https://yinuoweierfushi.1688.com/'); 171 } 172 return `https://${host}`; 173 } 174 export function canonicalizeItemUrl(input) { 175 const offerId = extractOfferId(input); 176 if (offerId) { 177 return `${DETAIL_URL_PREFIX}${offerId}.html`; 178 } 179 const url = parse1688UrlOrNull(input); 180 if (!url) 181 return null; 182 stripTrackingParams(url); 183 url.hash = ''; 184 return url.toString(); 185 } 186 export function canonicalizeSellerUrl(input) { 187 const memberId = extractMemberId(input); 188 if (memberId) { 189 return `${STORE_MOBILE_URL_PREFIX}${memberId}`; 190 } 191 const url = parse1688UrlOrNull(input); 192 if (!url) 193 return null; 194 const host = normalizeStoreHost(url.hostname); 195 if (!host) 196 return null; 197 return `https://${host}`; 198 } 199 export function extractOfferId(input) { 200 const normalized = cleanText(input); 201 if (!normalized) 202 return null; 203 const directId = normalized.match(/^\d{6,}$/)?.[0]; 204 if (directId) 205 return directId; 206 const detailMatch = normalized.match(/\/offer\/(\d{6,})\.html/i); 207 if (detailMatch) 208 return detailMatch[1]; 209 const queryMatch = normalized.match(/[?&]offerId=(\d{6,})/i); 210 if (queryMatch) 211 return queryMatch[1]; 212 return null; 213 } 214 export function extractMemberId(input) { 215 const normalized = cleanText(input); 216 if (!normalized) 217 return null; 218 const direct = normalized.match(/\bb2b-[a-z0-9]+\b/i)?.[0]; 219 if (direct) 220 return direct; 221 const queryMatch = normalized.match(/[?&]memberId=(b2b-[a-z0-9]+)/i); 222 if (queryMatch) 223 return queryMatch[1]; 224 const mobileMatch = normalized.match(/\/winport\/(b2b-[a-z0-9]+)\.html/i); 225 if (mobileMatch) 226 return mobileMatch[1]; 227 return null; 228 } 229 export function extractShopId(input) { 230 const normalized = cleanText(input); 231 if (!normalized) 232 return null; 233 try { 234 const url = new URL(/^https?:\/\//i.test(normalized) ? normalized : `https://${normalized}`); 235 const host = normalizeStoreHost(url.hostname); 236 if (!host) 237 return null; 238 return host.split('.')[0] ?? null; 239 } 240 catch { 241 return /^[a-z0-9-]+$/i.test(normalized) ? normalized : null; 242 } 243 } 244 export function buildProvenance(sourceUrl) { 245 return { 246 source_url: sourceUrl, 247 fetched_at: new Date().toISOString(), 248 strategy: STRATEGY, 249 }; 250 } 251 export function parsePriceText(text) { 252 const normalized = normalizeNumericText(cleanText(text)); 253 const matches = normalized.match(/\d+(?:,\d{3})*(?:\.\d+)?/g) ?? []; 254 const values = matches 255 .map((value) => Number.parseFloat(value.replace(/,/g, ''))) 256 .filter((value) => Number.isFinite(value)); 257 if (values.length === 0) { 258 return { 259 price_text: normalized, 260 price_min: null, 261 price_max: null, 262 currency: null, 263 }; 264 } 265 return { 266 price_text: normalized, 267 price_min: values[0] ?? null, 268 price_max: values[values.length - 1] ?? values[0] ?? null, 269 currency: normalized.includes('¥') || normalized.includes('元') ? 'CNY' : null, 270 }; 271 } 272 export function normalizePriceTiers(rawTiers, unit) { 273 return rawTiers 274 .map((tier) => { 275 const quantityMin = toNumber(tier.beginAmount); 276 const priceText = cleanText(tier.price); 277 const price = toNumber(tier.price); 278 return { 279 quantity_text: quantityMin !== null ? `${quantityMin}${unit ?? ''}` : '', 280 quantity_min: quantityMin, 281 price_text: priceText, 282 price, 283 currency: priceText ? 'CNY' : null, 284 }; 285 }) 286 .filter((tier) => tier.price_text); 287 } 288 export function parseMoqText(text) { 289 const normalized = normalizeNumericText(cleanText(text)); 290 const match = normalized.match(/(\d+(?:\.\d+)?)\s*(件|个|套|箱|包|双|台|把|只|pcs|piece|pieces)?\s*起批/i) 291 ?? normalized.match(/≥\s*(\d+(?:\.\d+)?)/); 292 const rangeMatch = normalized.match(/(\d+(?:\.\d+)?)\s*(?:~|-|至|到)\s*\d+(?:\.\d+)?\s*(件|个|套|箱|包|双|台|把|只|pcs|piece|pieces)/i); 293 if (!match && !rangeMatch) { 294 return { 295 moq_text: normalized, 296 moq_value: null, 297 }; 298 } 299 return { 300 moq_text: normalized, 301 moq_value: Number.parseFloat((match ?? rangeMatch)[1]), 302 }; 303 } 304 export function extractLocation(text) { 305 const normalized = cleanMultilineText(text); 306 const primaryRegion = normalized.split(/送至|发往/)[0] ?? normalized; 307 const lines = primaryRegion.split('\n'); 308 for (const line of lines) { 309 const compact = cleanText(line); 310 if (!compact || compact.length > 16) 311 continue; 312 if (CHINA_LOCATIONS.some((location) => compact.startsWith(location))) { 313 return compact; 314 } 315 } 316 const locationPattern = new RegExp(`(${CHINA_LOCATIONS.join('|')})[\\u4e00-\\u9fa5]{0,8}`); 317 return primaryRegion.match(locationPattern)?.[0] ?? null; 318 } 319 export function extractAddress(text) { 320 const normalized = cleanMultilineText(text); 321 const lineMatch = normalized.match(/地址[::]\s*([^\n]+)/); 322 if (lineMatch) 323 return cleanText(lineMatch[1]); 324 return normalized 325 .split('\n') 326 .map((line) => cleanText(line)) 327 .find((line) => line.includes('省') || line.includes('市') || line.includes('区') || line.includes('县')) 328 ?? null; 329 } 330 export function extractMetric(text, label) { 331 const normalized = cleanMultilineText(text); 332 const direct = normalized.match(new RegExp(`(?:^|\\n)\\s*${escapeForRegex(label)}[::]?\\s*([^\\n]+)`)); 333 if (direct) 334 return cleanText(direct[1]); 335 const lineBased = normalized.match(new RegExp(`(?:^|\\n)\\s*${escapeForRegex(label)}\\n([^\\n]+)`)); 336 return lineBased ? cleanText(lineBased[1]) : null; 337 } 338 export function extractYearsOnPlatform(text) { 339 return text.match(/入驻\d+年/)?.[0] ?? null; 340 } 341 export function extractMainBusiness(text) { 342 const value = extractMetric(text, '主营'); 343 return value ? value.replace(/^:/, '').trim() : null; 344 } 345 export function extractBadges(text, candidates) { 346 return uniqueNonEmpty(candidates.filter((candidate) => cleanMultilineText(text).includes(candidate))); 347 } 348 export function guessTopCategories(text) { 349 const mainBusiness = extractMainBusiness(text); 350 if (!mainBusiness) 351 return []; 352 return uniqueNonEmpty(mainBusiness.split(/[、,/|]/).map((value) => value.trim())); 353 } 354 export function isCaptchaState(state) { 355 const href = cleanText(state.href).toLowerCase(); 356 const title = cleanText(state.title); 357 const bodyText = cleanMultilineText(state.body_text); 358 if (href.includes(CAPTCHA_URL_MARKER)) 359 return true; 360 return CAPTCHA_TEXT_PATTERNS.some((pattern) => title.includes(pattern) || bodyText.includes(pattern)); 361 } 362 export function isLoginState(state) { 363 const href = cleanText(state.href).toLowerCase(); 364 const title = cleanText(state.title); 365 const bodyText = cleanMultilineText(state.body_text); 366 if (LOGIN_URL_PATTERNS.some((pattern) => href.includes(pattern))) 367 return true; 368 return LOGIN_TEXT_PATTERNS.some((pattern) => title.includes(pattern) || bodyText.includes(pattern)); 369 } 370 export function buildCaptchaHint(action) { 371 return [ 372 `Open a clean 1688 ${action} page in the shared Chrome profile and finish any slider challenge first.`, 373 'If you run opencli via CDP, set OPENCLI_CDP_TARGET=1688.com or a more specific 1688 host before retrying.', 374 ].join(' '); 375 } 376 export async function readPageState(page) { 377 const result = await page.evaluate(` 378 (() => ({ 379 href: window.location.href, 380 title: document.title || '', 381 body_text: document.body ? document.body.innerText || '' : '', 382 }))() 383 `); 384 return { 385 href: cleanText(result.href), 386 title: cleanText(result.title), 387 body_text: cleanMultilineText(result.body_text), 388 }; 389 } 390 export async function gotoAndReadState(page, url, settleMs = 2500, action = 'page') { 391 try { 392 await page.goto(url, { settleMs }); 393 await page.wait(1.5); 394 return readPageState(page); 395 } 396 catch (error) { 397 const message = error instanceof Error ? error.message : String(error); 398 if (message.includes('Inspected target navigated or closed') 399 || message.includes('Cannot find context with specified id') 400 || message.includes('Target closed')) { 401 throw new CommandExecutionError(`1688 ${action} navigation lost the current browser target`, `${buildCaptchaHint(action)} If CDP is attached to a stale or blocked tab, open a fresh 1688 tab and point OPENCLI_CDP_TARGET at that tab.`); 402 } 403 throw error; 404 } 405 } 406 export async function ensure1688Session(page) { 407 const state = await gotoAndReadState(page, HOME_URL, 1500, 'homepage'); 408 assertAuthenticatedState(state, 'homepage'); 409 } 410 export function assertAuthenticatedState(state, action) { 411 if (!isCaptchaState(state) && !isLoginState(state)) 412 return; 413 throw new AuthRequiredError('1688.com', `请先在共享 Chrome 完成 1688 登录/验证,再重试(${action})`); 414 } 415 export function assertNotCaptcha(state, action) { 416 assertAuthenticatedState(state, action); 417 } 418 export function toNumber(value) { 419 if (typeof value === 'number' && Number.isFinite(value)) { 420 return value; 421 } 422 if (typeof value === 'string') { 423 const normalized = value.replace(/,/g, '').trim(); 424 if (!normalized) 425 return null; 426 const parsed = Number.parseFloat(normalized); 427 return Number.isFinite(parsed) ? parsed : null; 428 } 429 return null; 430 } 431 export function limitCandidates(values, limit) { 432 const normalizedLimit = Math.max(1, Math.trunc(limit) || 1); 433 return values.slice(0, normalizedLimit); 434 } 435 export function normalizeMediaUrl(input) { 436 const raw = cleanText(input); 437 if (!raw) 438 return ''; 439 let value = raw 440 .replace(/^url\((.*)\)$/i, '$1') 441 .replace(/^['"]|['"]$/g, '') 442 .replace(/\\u002F/g, '/') 443 .replace(/&/g, '&') 444 .trim(); 445 if (!value || value.startsWith('data:') || value.startsWith('blob:')) 446 return ''; 447 if (value.startsWith('//')) 448 value = `https:${value}`; 449 try { 450 const url = new URL(value); 451 return url.toString(); 452 } 453 catch { 454 return ''; 455 } 456 } 457 export function uniqueMediaSources(values) { 458 const seen = new Set(); 459 const result = []; 460 for (const value of values) { 461 const url = normalizeMediaUrl(value.url); 462 if (!url) 463 continue; 464 const key = `${value.type}:${url}`; 465 if (seen.has(key)) 466 continue; 467 seen.add(key); 468 result.push({ 469 ...value, 470 url, 471 source: cleanText(value.source) || undefined, 472 }); 473 } 474 return result; 475 } 476 function normalizeNumericText(value) { 477 return value 478 .replace(/([¥$€])\s+(?=\d)/g, '$1') 479 .replace(/(\d)\s*\.\s*(\d)/g, '$1.$2') 480 .replace(/\s*([~-])\s*/g, '$1') 481 .trim(); 482 } 483 function escapeForRegex(value) { 484 return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); 485 } 486 function parse1688Url(input) { 487 const normalized = cleanText(input); 488 try { 489 const url = new URL(normalized); 490 if (!url.hostname.endsWith('.1688.com') && url.hostname !== '1688.com' && url.hostname !== 'www.1688.com') { 491 throw new Error('invalid-host'); 492 } 493 stripTrackingParams(url); 494 url.hash = ''; 495 return url; 496 } 497 catch { 498 throw new ArgumentError('Invalid 1688 URL', 'Use a URL under 1688.com (for example: https://detail.1688.com/offer/887904326744.html)'); 499 } 500 } 501 function parse1688UrlOrNull(input) { 502 try { 503 return parse1688Url(input); 504 } 505 catch { 506 return null; 507 } 508 } 509 function normalizeStoreHost(hostname) { 510 const lower = cleanText(hostname).toLowerCase(); 511 if (!lower.endsWith('.1688.com')) 512 return null; 513 const [subdomain] = lower.split('.'); 514 if (!subdomain || STORE_GENERIC_HOSTS.has(subdomain)) 515 return null; 516 return lower; 517 } 518 function stripTrackingParams(url) { 519 const keys = [...url.searchParams.keys()]; 520 for (const key of keys) { 521 if (TRACKING_QUERY_KEYS.has(key) || key.toLowerCase().startsWith('utm_')) { 522 url.searchParams.delete(key); 523 } 524 } 525 } 526 export const __test__ = { 527 SEARCH_LIMIT_DEFAULT, 528 SEARCH_LIMIT_MAX, 529 parseSearchLimit, 530 buildSearchUrl, 531 buildDetailUrl, 532 resolveStoreUrl, 533 canonicalizeStoreUrl, 534 canonicalizeItemUrl, 535 canonicalizeSellerUrl, 536 extractOfferId, 537 extractMemberId, 538 extractShopId, 539 parsePriceText, 540 normalizePriceTiers, 541 parseMoqText, 542 extractLocation, 543 extractAddress, 544 extractMetric, 545 extractYearsOnPlatform, 546 extractMainBusiness, 547 extractBadges, 548 guessTopCategories, 549 isCaptchaState, 550 isLoginState, 551 cleanText, 552 cleanMultilineText, 553 uniqueNonEmpty, 554 normalizeMediaUrl, 555 uniqueMediaSources, 556 limitCandidates, 557 };