/ clis / 1688 / store.js
store.js
  1  import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
  2  import { cli, Strategy } from '@jackwener/opencli/registry';
  3  import { FACTORY_BADGE_PATTERNS, SERVICE_BADGE_PATTERNS, assertAuthenticatedState, buildDetailUrl, buildProvenance, canonicalizeSellerUrl, canonicalizeStoreUrl, cleanMultilineText, cleanText, extractAddress, extractBadges, extractMemberId, extractMetric, extractOfferId, extractShopId, extractYearsOnPlatform, gotoAndReadState, guessTopCategories, resolveStoreUrl, uniqueNonEmpty, } from './shared.js';
  4  function normalizeStorePayload(input) {
  5      const storePayload = input.storePayload;
  6      const contactPayload = input.contactPayload;
  7      const seed = input.seed;
  8      const contactText = cleanMultilineText(contactPayload?.bodyText);
  9      const storeText = cleanMultilineText(storePayload?.bodyText);
 10      const seedText = cleanMultilineText(seed?.bodyText);
 11      const combinedText = [contactText, storeText, seedText].filter(Boolean).join('\n');
 12      const sellerUrlRaw = cleanText(seed?.seller?.winportUrl
 13          ?? seed?.seller?.sellerWinportUrlMap?.defaultUrl
 14          ?? storePayload?.href
 15          ?? input.resolvedUrl);
 16      const storeUrl = safeCanonicalStoreUrl(sellerUrlRaw || input.resolvedUrl) ?? input.resolvedUrl;
 17      const sellerUrl = canonicalizeSellerUrl(sellerUrlRaw) ?? storeUrl;
 18      const companyUrl = pickCompanyUrl(contactPayload?.href, storeUrl);
 19      const memberId = cleanText(seed?.seller?.memberId)
 20          || input.explicitMemberId
 21          || extractMemberId(input.resolvedUrl)
 22          || extractMemberId(storePayload?.href ?? '')
 23          || null;
 24      const shopId = extractShopId(sellerUrl) ?? extractShopId(storeUrl);
 25      const companyName = cleanText(seed?.seller?.companyName)
 26          || firstNamedLine(contactText)
 27          || firstNamedLine(storeText)
 28          || null;
 29      const serviceBadges = uniqueNonEmpty([
 30          ...extractBadges(combinedText, SERVICE_BADGE_PATTERNS),
 31          ...((seed?.services ?? []).map((service) => cleanText(service.serviceName))),
 32      ]);
 33      const factoryBadges = extractBadges(combinedText, FACTORY_BADGE_PATTERNS);
 34      return {
 35          member_id: memberId,
 36          shop_id: shopId,
 37          store_name: companyName,
 38          store_url: storeUrl,
 39          company_name: companyName,
 40          company_url: companyUrl,
 41          business_model_text: firstMetric(combinedText, ['经营模式', '生产加工', '主营产品']),
 42          years_on_platform_text: extractYearsOnPlatform(combinedText),
 43          location: extractAddress(contactText) ?? extractAddress(storeText),
 44          staff_size_text: firstMetric(combinedText, ['员工人数', '员工总数']),
 45          factory_badges: factoryBadges,
 46          service_badges: serviceBadges,
 47          response_rate_text: firstMetric(combinedText, ['响应率', '回复率', '响应速度']),
 48          return_rate_text: extractReturnRate(combinedText),
 49          top_categories: guessTopCategories(combinedText),
 50          phone_text: extractMetric(contactText, '电话'),
 51          mobile_text: extractMetric(contactText, '手机'),
 52          ...buildProvenance(cleanText(contactPayload?.href) || cleanText(storePayload?.href) || input.resolvedUrl),
 53      };
 54  }
 55  function safeCanonicalStoreUrl(url) {
 56      try {
 57          return canonicalizeStoreUrl(url);
 58      }
 59      catch {
 60          return null;
 61      }
 62  }
 63  function pickCompanyUrl(contactHref, storeUrl) {
 64      const fromPage = cleanText(contactHref);
 65      if (fromPage) {
 66          const normalized = buildContactUrl(fromPage);
 67          if (normalized)
 68              return normalized;
 69      }
 70      return buildContactUrl(storeUrl);
 71  }
 72  function buildContactUrl(storeUrl) {
 73      try {
 74          const parsed = new URL(storeUrl);
 75          if (!parsed.hostname.endsWith('.1688.com'))
 76              return null;
 77          return `${parsed.protocol}//${parsed.hostname}/page/contactinfo.html`;
 78      }
 79      catch {
 80          return null;
 81      }
 82  }
 83  function firstNamedLine(text) {
 84      return text
 85          .split('\n')
 86          .map((line) => cleanText(line))
 87          .find((line) => line.includes('有限公司') || line.includes('商行') || line.includes('工厂'))
 88          ?? null;
 89  }
 90  function firstMetric(text, labels) {
 91      for (const label of labels) {
 92          const value = extractMetric(text, label);
 93          if (value)
 94              return value;
 95      }
 96      return null;
 97  }
 98  function extractReturnRate(text) {
 99      const inline = text.match(/回头率\s*([0-9.]+%)/);
100      if (inline)
101          return cleanText(inline[0]);
102      const multiline = text.match(/回头率\s*\n\s*([0-9.]+%)/);
103      if (!multiline)
104          return null;
105      return `回头率${cleanText(multiline[1])}`;
106  }
107  function firstOfferId(links) {
108      for (const link of links) {
109          const offerId = extractOfferId(link);
110          if (offerId)
111              return offerId;
112      }
113      return null;
114  }
115  function firstContactUrl(links) {
116      for (const link of links) {
117          const url = buildContactUrl(link);
118          if (url)
119              return url;
120      }
121      return null;
122  }
123  async function readStorePayload(page, url, action) {
124      const state = await gotoAndReadState(page, url, 2500, action);
125      assertAuthenticatedState(state, action);
126      return await page.evaluate(`
127      (() => ({
128        href: window.location.href,
129        title: document.title || '',
130        bodyText: document.body ? document.body.innerText || '' : '',
131        offerLinks: Array.from(document.querySelectorAll('a[href*="detail.1688.com/offer/"], a[href*="offerId="]'))
132          .map((anchor) => anchor.href)
133          .filter(Boolean),
134        contactLinks: Array.from(document.querySelectorAll('a[href*="contactinfo"]'))
135          .map((anchor) => anchor.href)
136          .filter(Boolean),
137      }))()
138    `);
139  }
140  async function readItemSeed(page, offerId) {
141      const itemUrl = buildDetailUrl(offerId);
142      const state = await gotoAndReadState(page, itemUrl, 2500, 'store seed item');
143      assertAuthenticatedState(state, 'store seed item');
144      const seed = await page.evaluate(`
145      (() => {
146        const model = window.context?.result?.global?.globalData?.model ?? null;
147        const toJson = (value) => JSON.parse(JSON.stringify(value ?? null));
148        return {
149          href: window.location.href,
150          bodyText: document.body ? document.body.innerText || '' : '',
151          seller: toJson(model?.sellerModel),
152          services: toJson(model?.shippingServices?.fields?.buyerProtectionModel ?? []),
153        };
154      })()
155    `);
156      const hasSellerContext = !!cleanText(seed?.seller?.memberId) || !!cleanText(seed?.seller?.winportUrl);
157      if (!hasSellerContext) {
158          throw new CommandExecutionError('1688 store seed item did not expose seller context', '当前 tab 非商品详情上下文,请切到 detail.1688.com 商品页并重试');
159      }
160      return seed;
161  }
162  function hasAnyEvidence(storePayload, contactPayload, seed) {
163      return !!cleanText(storePayload?.bodyText)
164          || !!cleanText(contactPayload?.bodyText)
165          || !!cleanText(seed?.bodyText);
166  }
167  cli({
168      site: '1688',
169      name: 'store',
170      description: '1688 店铺/供应商公开信息(联系方式、主营、入驻年限、公开服务信号)',
171      domain: 'www.1688.com',
172      strategy: Strategy.COOKIE,
173      navigateBefore: false,
174      args: [
175          {
176              name: 'input',
177              required: true,
178              positional: true,
179              help: '1688 店铺 URL 或 member ID(如 b2b-22154705262941f196)',
180          },
181      ],
182      columns: ['store_name', 'years_on_platform_text', 'location', 'return_rate_text'],
183      func: async (page, kwargs) => {
184          const rawInput = String(kwargs.input ?? '');
185          const resolvedUrl = resolveStoreUrl(rawInput);
186          const explicitMemberId = extractMemberId(rawInput);
187          const storePayload = await readStorePayload(page, resolvedUrl, 'store');
188          const contactUrl = firstContactUrl(storePayload.contactLinks ?? []) || buildContactUrl(storePayload.href || resolvedUrl);
189          const contactPayload = contactUrl ? await readStorePayload(page, contactUrl, 'store contact') : null;
190          const offerId = extractOfferId(rawInput)
191              || firstOfferId(storePayload.offerLinks ?? [])
192              || firstOfferId(contactPayload?.offerLinks ?? []);
193          let seed = null;
194          if (offerId) {
195              try {
196                  seed = await readItemSeed(page, offerId);
197              }
198              catch (error) {
199                  if (!(error instanceof CommandExecutionError))
200                      throw error;
201              }
202          }
203          if (!hasAnyEvidence(storePayload, contactPayload, seed)) {
204              throw new EmptyResultError('1688 store', 'Store page is reachable but no visible fields were extracted. Open the store page in Chrome and retry.');
205          }
206          return [
207              normalizeStorePayload({
208                  resolvedUrl,
209                  storePayload,
210                  contactPayload,
211                  seed,
212                  explicitMemberId,
213              }),
214          ];
215      },
216  });
217  export const __test__ = {
218      normalizeStorePayload,
219      safeCanonicalStoreUrl,
220      buildContactUrl,
221      firstNamedLine,
222      firstMetric,
223      extractReturnRate,
224      firstOfferId,
225      firstContactUrl,
226  };