item.js
1 import { CommandExecutionError } from '@jackwener/opencli/errors'; 2 import { cli, Strategy } from '@jackwener/opencli/registry'; 3 import { isRecord } from '@jackwener/opencli/utils'; 4 import { assertAuthenticatedState, buildDetailUrl, buildProvenance, canonicalizeSellerUrl, cleanMultilineText, cleanText, extractLocation, extractMemberId, extractOfferId, extractShopId, gotoAndReadState, normalizePriceTiers, parseMoqText, parsePriceText, toNumber, uniqueNonEmpty, } from './shared.js'; 5 function normalizeItemPayload(payload) { 6 const href = cleanText(payload.href); 7 const bodyText = cleanMultilineText(payload.bodyText); 8 const sellerName = cleanText(payload.seller?.companyName); 9 const sellerUrlRaw = cleanText(payload.seller?.winportUrl 10 ?? payload.seller?.sellerWinportUrlMap?.defaultUrl 11 ?? payload.seller?.sellerWinportUrlMap?.indexUrl); 12 const sellerUrl = canonicalizeSellerUrl(sellerUrlRaw); 13 const offerId = cleanText(String(payload.offerId ?? '')) || extractOfferId(href) || null; 14 const memberId = cleanText(payload.seller?.memberId) || extractMemberId(sellerUrlRaw || href) || null; 15 const shopId = extractShopId(sellerUrl ?? href); 16 const unit = cleanText(payload.trade?.unit); 17 const priceDisplay = cleanText(payload.trade?.priceDisplay); 18 const priceRange = parsePriceText(priceDisplay ? `¥${priceDisplay}` : bodyText); 19 const moqText = extractMoqText(bodyText, payload.trade?.beginAmount, unit); 20 const moq = parseMoqText(moqText); 21 const services = uniqueServices(payload); 22 const serviceBadges = uniqueNonEmpty(services.map((service) => cleanText(service.serviceName))); 23 const attributes = normalizeVisibleAttributes(payload.trade?.offerIDatacenterSellInfo); 24 const priceTiers = normalizePriceTiers(payload.trade?.offerPriceModel?.currentPrices ?? [], unit || null); 25 const images = uniqueNonEmpty([ 26 ...(payload.gallery?.mainImage ?? []), 27 ...(payload.gallery?.offerImgList ?? []), 28 ...((payload.gallery?.wlImageInfos ?? []).map((item) => item.fullPathImageURI ?? '')), 29 ]); 30 const detailUrl = offerId ? buildDetailUrl(offerId) : href; 31 const provenance = buildProvenance(href || detailUrl); 32 return { 33 offer_id: offerId, 34 member_id: memberId, 35 shop_id: shopId, 36 title: cleanText(payload.offerTitle) || stripAlibabaSuffix(payload.title) || firstNonEmptyLine(bodyText) || null, 37 item_url: detailUrl, 38 main_images: images, 39 price_text: priceRange.price_text || null, 40 price_tiers: priceTiers, 41 currency: priceRange.currency, 42 moq_text: moq.moq_text || null, 43 moq_value: moq.moq_value, 44 seller_name: sellerName || null, 45 seller_url: sellerUrl, 46 shop_name: sellerName || null, 47 origin_place: extractLocation(bodyText), 48 delivery_days_text: extractDeliveryDaysText(bodyText, services, payload.shipping), 49 customization_text: extractKeywordLine(bodyText, ['来样定制', '来图定制', '支持定制', '可定制', '定制']), 50 private_label_text: extractKeywordLine(bodyText, ['贴牌', '贴标', '定制logo', '打logo', 'OEM', 'ODM']), 51 visible_attributes: attributes, 52 sales_text: extractSalesText(bodyText), 53 service_badges: serviceBadges, 54 stock_quantity: extractStockQuantity(bodyText), 55 ...provenance, 56 }; 57 } 58 function normalizeVisibleAttributes(raw) { 59 if (!isRecord(raw)) 60 return []; 61 return Object.entries(raw) 62 .filter(([key, value]) => key !== 'sellPointModel' && cleanText(key) && cleanText(String(value))) 63 .map(([key, value]) => ({ key: cleanText(key), value: cleanText(String(value)) })); 64 } 65 function uniqueServices(payload) { 66 const combined = [ 67 ...(Array.isArray(payload.services) ? payload.services : []), 68 ...(Array.isArray(payload.shipping?.protectionInfos) ? payload.shipping.protectionInfos : []), 69 ...(Array.isArray(payload.shipping?.buyerProtectionModel) ? payload.shipping.buyerProtectionModel : []), 70 ]; 71 const seen = new Set(); 72 const result = []; 73 for (const service of combined) { 74 const key = cleanText(service.serviceName); 75 if (!key || seen.has(key)) 76 continue; 77 seen.add(key); 78 result.push(service); 79 } 80 return result; 81 } 82 function stripAlibabaSuffix(title) { 83 return cleanText(title).replace(/\s*-\s*阿里巴巴$/, '').trim(); 84 } 85 function firstNonEmptyLine(text) { 86 return text.split('\n').map((line) => cleanText(line)).find(Boolean) ?? ''; 87 } 88 function extractMoqText(bodyText, beginAmount, unit) { 89 const lineMatch = bodyText.match(/\d+(?:\.\d+)?\s*(件|个|套|箱|包|双|台|把|只)\s*起批/); 90 if (lineMatch) 91 return lineMatch[0]; 92 const moqValue = toNumber(beginAmount); 93 if (moqValue !== null) { 94 return `${moqValue}${unit || ''}起批`; 95 } 96 return ''; 97 } 98 function extractDeliveryDaysText(bodyText, services, shipping) { 99 const shippingText = cleanText(shipping?.deliveryLimitText) || cleanText(shipping?.logisticsText); 100 if (shippingText) 101 return shippingText; 102 const textMatch = bodyText.match(/\d+\s*(?:小时|天)(?:内)?发货/); 103 if (textMatch) 104 return textMatch[0]; 105 const hourMatch = services.find((service) => typeof service.agreeDeliveryHours === 'number'); 106 if (hourMatch && typeof hourMatch.agreeDeliveryHours === 'number') { 107 return `${hourMatch.agreeDeliveryHours}小时内发货`; 108 } 109 return null; 110 } 111 function extractKeywordLine(bodyText, keywords) { 112 const lines = bodyText.split('\n').map((line) => cleanText(line)).filter(Boolean); 113 for (const line of lines) { 114 if (keywords.some((keyword) => line.includes(keyword))) { 115 return line; 116 } 117 } 118 return null; 119 } 120 function extractSalesText(bodyText) { 121 const match = bodyText.match(/(?:全网销量|已售)\s*\d+(?:\.\d+)?\+?\s*[件套个单]?/); 122 return match ? cleanText(match[0]) : null; 123 } 124 function extractStockQuantity(bodyText) { 125 const match = bodyText.match(/库存\s*(\d+)/); 126 return match ? Number.parseInt(match[1], 10) : null; 127 } 128 async function readItemPayload(page, itemUrl) { 129 const state = await gotoAndReadState(page, itemUrl, 2500, 'item'); 130 assertAuthenticatedState(state, 'item'); 131 const payload = await page.evaluate(` 132 (() => { 133 const root = window.context ?? {}; 134 const model = root.result?.global?.globalData?.model ?? null; 135 const toJson = (value) => JSON.parse(JSON.stringify(value ?? null)); 136 return { 137 href: window.location.href, 138 title: document.title || '', 139 bodyText: document.body ? document.body.innerText || '' : '', 140 offerTitle: model?.offerTitleModel?.subject ?? '', 141 offerId: model?.tradeModel?.offerId ?? '', 142 seller: toJson(model?.sellerModel), 143 trade: toJson(model?.tradeModel), 144 gallery: toJson(root.result?.data?.gallery?.fields ?? null), 145 shipping: toJson(root.result?.data?.shippingServices?.fields ?? null), 146 services: toJson(root.result?.data?.shippingServices?.fields?.protectionInfos ?? []), 147 }; 148 })() 149 `); 150 const resolvedOfferId = cleanText(String(payload.offerId ?? '')) || extractOfferId(cleanText(payload.href)); 151 if (!resolvedOfferId) { 152 throw new CommandExecutionError('1688 item page did not expose product context', '当前 tab 非商品详情上下文,请切到 detail.1688.com 商品页并重试'); 153 } 154 return payload; 155 } 156 cli({ 157 site: '1688', 158 name: 'item', 159 description: '1688 商品详情(公开商品字段、价格阶梯、卖家基础信息)', 160 domain: 'www.1688.com', 161 strategy: Strategy.COOKIE, 162 navigateBefore: false, 163 args: [ 164 { 165 name: 'input', 166 required: true, 167 positional: true, 168 help: '1688 商品 URL 或 offer ID(如 887904326744)', 169 }, 170 ], 171 columns: ['offer_id', 'title', 'price_text', 'moq_text', 'seller_name', 'origin_place'], 172 func: async (page, kwargs) => { 173 const itemUrl = buildDetailUrl(String(kwargs.input ?? '')); 174 const payload = await readItemPayload(page, itemUrl); 175 return [normalizeItemPayload(payload)]; 176 }, 177 }); 178 export const __test__ = { 179 normalizeItemPayload, 180 normalizeVisibleAttributes, 181 stripAlibabaSuffix, 182 extractMoqText, 183 extractDeliveryDaysText, 184 extractKeywordLine, 185 extractSalesText, 186 extractStockQuantity, 187 };