store.js
1 import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; 2 import { cli, Strategy } from '@jackwener/opencli/registry'; 3 import { FACTORY_BADGE_PATTERNS, SERVICE_BADGE_PATTERNS, assertAuthenticatedState, buildDetailUrl, buildProvenance, canonicalizeSellerUrl, canonicalizeStoreUrl, cleanMultilineText, cleanText, extractAddress, extractBadges, extractMemberId, extractMetric, extractOfferId, extractShopId, extractYearsOnPlatform, gotoAndReadState, guessTopCategories, resolveStoreUrl, uniqueNonEmpty, } from './shared.js'; 4 function normalizeStorePayload(input) { 5 const storePayload = input.storePayload; 6 const contactPayload = input.contactPayload; 7 const seed = input.seed; 8 const contactText = cleanMultilineText(contactPayload?.bodyText); 9 const storeText = cleanMultilineText(storePayload?.bodyText); 10 const seedText = cleanMultilineText(seed?.bodyText); 11 const combinedText = [contactText, storeText, seedText].filter(Boolean).join('\n'); 12 const sellerUrlRaw = cleanText(seed?.seller?.winportUrl 13 ?? seed?.seller?.sellerWinportUrlMap?.defaultUrl 14 ?? storePayload?.href 15 ?? input.resolvedUrl); 16 const storeUrl = safeCanonicalStoreUrl(sellerUrlRaw || input.resolvedUrl) ?? input.resolvedUrl; 17 const sellerUrl = canonicalizeSellerUrl(sellerUrlRaw) ?? storeUrl; 18 const companyUrl = pickCompanyUrl(contactPayload?.href, storeUrl); 19 const memberId = cleanText(seed?.seller?.memberId) 20 || input.explicitMemberId 21 || extractMemberId(input.resolvedUrl) 22 || extractMemberId(storePayload?.href ?? '') 23 || null; 24 const shopId = extractShopId(sellerUrl) ?? extractShopId(storeUrl); 25 const companyName = cleanText(seed?.seller?.companyName) 26 || firstNamedLine(contactText) 27 || firstNamedLine(storeText) 28 || null; 29 const serviceBadges = uniqueNonEmpty([ 30 ...extractBadges(combinedText, SERVICE_BADGE_PATTERNS), 31 ...((seed?.services ?? []).map((service) => cleanText(service.serviceName))), 32 ]); 33 const factoryBadges = extractBadges(combinedText, FACTORY_BADGE_PATTERNS); 34 return { 35 member_id: memberId, 36 shop_id: shopId, 37 store_name: companyName, 38 store_url: storeUrl, 39 company_name: companyName, 40 company_url: companyUrl, 41 business_model_text: firstMetric(combinedText, ['经营模式', '生产加工', '主营产品']), 42 years_on_platform_text: extractYearsOnPlatform(combinedText), 43 location: extractAddress(contactText) ?? extractAddress(storeText), 44 staff_size_text: firstMetric(combinedText, ['员工人数', '员工总数']), 45 factory_badges: factoryBadges, 46 service_badges: serviceBadges, 47 response_rate_text: firstMetric(combinedText, ['响应率', '回复率', '响应速度']), 48 return_rate_text: extractReturnRate(combinedText), 49 top_categories: guessTopCategories(combinedText), 50 phone_text: extractMetric(contactText, '电话'), 51 mobile_text: extractMetric(contactText, '手机'), 52 ...buildProvenance(cleanText(contactPayload?.href) || cleanText(storePayload?.href) || input.resolvedUrl), 53 }; 54 } 55 function safeCanonicalStoreUrl(url) { 56 try { 57 return canonicalizeStoreUrl(url); 58 } 59 catch { 60 return null; 61 } 62 } 63 function pickCompanyUrl(contactHref, storeUrl) { 64 const fromPage = cleanText(contactHref); 65 if (fromPage) { 66 const normalized = buildContactUrl(fromPage); 67 if (normalized) 68 return normalized; 69 } 70 return buildContactUrl(storeUrl); 71 } 72 function buildContactUrl(storeUrl) { 73 try { 74 const parsed = new URL(storeUrl); 75 if (!parsed.hostname.endsWith('.1688.com')) 76 return null; 77 return `${parsed.protocol}//${parsed.hostname}/page/contactinfo.html`; 78 } 79 catch { 80 return null; 81 } 82 } 83 function firstNamedLine(text) { 84 return text 85 .split('\n') 86 .map((line) => cleanText(line)) 87 .find((line) => line.includes('有限公司') || line.includes('商行') || line.includes('工厂')) 88 ?? null; 89 } 90 function firstMetric(text, labels) { 91 for (const label of labels) { 92 const value = extractMetric(text, label); 93 if (value) 94 return value; 95 } 96 return null; 97 } 98 function extractReturnRate(text) { 99 const inline = text.match(/回头率\s*([0-9.]+%)/); 100 if (inline) 101 return cleanText(inline[0]); 102 const multiline = text.match(/回头率\s*\n\s*([0-9.]+%)/); 103 if (!multiline) 104 return null; 105 return `回头率${cleanText(multiline[1])}`; 106 } 107 function firstOfferId(links) { 108 for (const link of links) { 109 const offerId = extractOfferId(link); 110 if (offerId) 111 return offerId; 112 } 113 return null; 114 } 115 function firstContactUrl(links) { 116 for (const link of links) { 117 const url = buildContactUrl(link); 118 if (url) 119 return url; 120 } 121 return null; 122 } 123 async function readStorePayload(page, url, action) { 124 const state = await gotoAndReadState(page, url, 2500, action); 125 assertAuthenticatedState(state, action); 126 return await page.evaluate(` 127 (() => ({ 128 href: window.location.href, 129 title: document.title || '', 130 bodyText: document.body ? document.body.innerText || '' : '', 131 offerLinks: Array.from(document.querySelectorAll('a[href*="detail.1688.com/offer/"], a[href*="offerId="]')) 132 .map((anchor) => anchor.href) 133 .filter(Boolean), 134 contactLinks: Array.from(document.querySelectorAll('a[href*="contactinfo"]')) 135 .map((anchor) => anchor.href) 136 .filter(Boolean), 137 }))() 138 `); 139 } 140 async function readItemSeed(page, offerId) { 141 const itemUrl = buildDetailUrl(offerId); 142 const state = await gotoAndReadState(page, itemUrl, 2500, 'store seed item'); 143 assertAuthenticatedState(state, 'store seed item'); 144 const seed = await page.evaluate(` 145 (() => { 146 const model = window.context?.result?.global?.globalData?.model ?? null; 147 const toJson = (value) => JSON.parse(JSON.stringify(value ?? null)); 148 return { 149 href: window.location.href, 150 bodyText: document.body ? document.body.innerText || '' : '', 151 seller: toJson(model?.sellerModel), 152 services: toJson(model?.shippingServices?.fields?.buyerProtectionModel ?? []), 153 }; 154 })() 155 `); 156 const hasSellerContext = !!cleanText(seed?.seller?.memberId) || !!cleanText(seed?.seller?.winportUrl); 157 if (!hasSellerContext) { 158 throw new CommandExecutionError('1688 store seed item did not expose seller context', '当前 tab 非商品详情上下文,请切到 detail.1688.com 商品页并重试'); 159 } 160 return seed; 161 } 162 function hasAnyEvidence(storePayload, contactPayload, seed) { 163 return !!cleanText(storePayload?.bodyText) 164 || !!cleanText(contactPayload?.bodyText) 165 || !!cleanText(seed?.bodyText); 166 } 167 cli({ 168 site: '1688', 169 name: 'store', 170 description: '1688 店铺/供应商公开信息(联系方式、主营、入驻年限、公开服务信号)', 171 domain: 'www.1688.com', 172 strategy: Strategy.COOKIE, 173 navigateBefore: false, 174 args: [ 175 { 176 name: 'input', 177 required: true, 178 positional: true, 179 help: '1688 店铺 URL 或 member ID(如 b2b-22154705262941f196)', 180 }, 181 ], 182 columns: ['store_name', 'years_on_platform_text', 'location', 'return_rate_text'], 183 func: async (page, kwargs) => { 184 const rawInput = String(kwargs.input ?? ''); 185 const resolvedUrl = resolveStoreUrl(rawInput); 186 const explicitMemberId = extractMemberId(rawInput); 187 const storePayload = await readStorePayload(page, resolvedUrl, 'store'); 188 const contactUrl = firstContactUrl(storePayload.contactLinks ?? []) || buildContactUrl(storePayload.href || resolvedUrl); 189 const contactPayload = contactUrl ? await readStorePayload(page, contactUrl, 'store contact') : null; 190 const offerId = extractOfferId(rawInput) 191 || firstOfferId(storePayload.offerLinks ?? []) 192 || firstOfferId(contactPayload?.offerLinks ?? []); 193 let seed = null; 194 if (offerId) { 195 try { 196 seed = await readItemSeed(page, offerId); 197 } 198 catch (error) { 199 if (!(error instanceof CommandExecutionError)) 200 throw error; 201 } 202 } 203 if (!hasAnyEvidence(storePayload, contactPayload, seed)) { 204 throw new EmptyResultError('1688 store', 'Store page is reachable but no visible fields were extracted. Open the store page in Chrome and retry.'); 205 } 206 return [ 207 normalizeStorePayload({ 208 resolvedUrl, 209 storePayload, 210 contactPayload, 211 seed, 212 explicitMemberId, 213 }), 214 ]; 215 }, 216 }); 217 export const __test__ = { 218 normalizeStorePayload, 219 safeCanonicalStoreUrl, 220 buildContactUrl, 221 firstNamedLine, 222 firstMetric, 223 extractReturnRate, 224 firstOfferId, 225 firstContactUrl, 226 };