/ clis / 36kr / hot.js
hot.js
 1  /**
 2   * 36kr hot-list — DOM scraping.
 3   *
 4   * Navigates to the 36kr hot-list page and scrapes rendered article links.
 5   * Supports category types: renqi (人气), zonghe (综合), shoucang (收藏), catalog (综合热门).
 6   */
 7  import { cli, Strategy } from '@jackwener/opencli/registry';
 8  import { CliError } from '@jackwener/opencli/errors';
 9  const TYPE_MAP = {
10      renqi: '人气榜',
11      zonghe: '综合榜',
12      shoucang: '收藏榜',
13      catalog: '热门资讯',
14  };
15  function getShanghaiDate(date = new Date()) {
16      // Shanghai stays on UTC+8 year-round, so a fixed offset is sufficient here
17      // and avoids the slow Intl timezone path that timed out on Windows CI.
18      return new Date(date.getTime() + 8 * 60 * 60 * 1000).toISOString().slice(0, 10);
19  }
20  function buildHotListUrl(listType, date = new Date()) {
21      if (listType === 'catalog') {
22          return 'https://www.36kr.com/hot-list/catalog';
23      }
24      return `https://www.36kr.com/hot-list/${listType}/${getShanghaiDate(date)}/1`;
25  }
26  cli({
27      site: '36kr',
28      name: 'hot',
29      description: '36氪热榜 — trending articles (renqi/zonghe/shoucang/catalog)',
30      domain: 'www.36kr.com',
31      strategy: Strategy.PUBLIC,
32      browser: true,
33      args: [
34          { name: 'limit', type: 'int', default: 20, help: 'Number of items (max 50)' },
35          {
36              name: 'type',
37              type: 'string',
38              default: 'catalog',
39              help: 'List type: renqi (人气), zonghe (综合), shoucang (收藏), catalog (热门资讯)',
40          },
41      ],
42      columns: ['rank', 'title', 'url'],
43      func: async (page, args) => {
44          const count = Math.min(Number(args.limit) || 20, 50);
45          const listType = String(args.type ?? 'catalog');
46          if (!TYPE_MAP[listType]) {
47              throw new CliError('INVALID_ARGUMENT', `Unknown type "${listType}". Valid types: ${Object.keys(TYPE_MAP).join(', ')}`);
48          }
49          const url = buildHotListUrl(listType);
50          await page.goto(url);
51          // Poll DOM until article links appear (36kr renders client-side)
52          const deadline = Date.now() + 5000;
53          while (Date.now() < deadline) {
54              if (await page.evaluate('document.querySelectorAll("a[href*=\\"/p/\\"]").length'))
55                  break;
56              await new Promise(r => setTimeout(r, 300));
57          }
58          // Scrape rendered article links from DOM (deduplicated)
59          const domItems = await page.evaluate(`
60        (() => {
61          const seen = new Set();
62          const results = [];
63          const links = document.querySelectorAll('a[href*="/p/"]');
64          for (const el of links) {
65            const href = el.getAttribute('href') || '';
66            const title = el.textContent?.trim() || '';
67            if (!title || title.length < 5 || seen.has(href) || seen.has(title)) continue;
68            seen.add(href);
69            seen.add(title);
70            results.push({ title, url: href.startsWith('http') ? href : 'https://36kr.com' + href });
71          }
72          return results;
73        })()
74      `);
75          const items = Array.isArray(domItems) ? domItems : [];
76          if (items.length === 0) {
77              throw new CliError('NO_DATA', 'Could not retrieve 36kr hot list', '36kr may have changed its DOM structure');
78          }
79          return items.slice(0, count).map((item, i) => ({
80              rank: i + 1,
81              title: item.title,
82              url: item.url,
83          }));
84      },
85  });
86  export { buildHotListUrl, getShanghaiDate };