hot.js
1 /** 2 * 36kr hot-list — DOM scraping. 3 * 4 * Navigates to the 36kr hot-list page and scrapes rendered article links. 5 * Supports category types: renqi (人气), zonghe (综合), shoucang (收藏), catalog (综合热门). 6 */ 7 import { cli, Strategy } from '@jackwener/opencli/registry'; 8 import { CliError } from '@jackwener/opencli/errors'; 9 const TYPE_MAP = { 10 renqi: '人气榜', 11 zonghe: '综合榜', 12 shoucang: '收藏榜', 13 catalog: '热门资讯', 14 }; 15 function getShanghaiDate(date = new Date()) { 16 // Shanghai stays on UTC+8 year-round, so a fixed offset is sufficient here 17 // and avoids the slow Intl timezone path that timed out on Windows CI. 18 return new Date(date.getTime() + 8 * 60 * 60 * 1000).toISOString().slice(0, 10); 19 } 20 function buildHotListUrl(listType, date = new Date()) { 21 if (listType === 'catalog') { 22 return 'https://www.36kr.com/hot-list/catalog'; 23 } 24 return `https://www.36kr.com/hot-list/${listType}/${getShanghaiDate(date)}/1`; 25 } 26 cli({ 27 site: '36kr', 28 name: 'hot', 29 description: '36氪热榜 — trending articles (renqi/zonghe/shoucang/catalog)', 30 domain: 'www.36kr.com', 31 strategy: Strategy.PUBLIC, 32 browser: true, 33 args: [ 34 { name: 'limit', type: 'int', default: 20, help: 'Number of items (max 50)' }, 35 { 36 name: 'type', 37 type: 'string', 38 default: 'catalog', 39 help: 'List type: renqi (人气), zonghe (综合), shoucang (收藏), catalog (热门资讯)', 40 }, 41 ], 42 columns: ['rank', 'title', 'url'], 43 func: async (page, args) => { 44 const count = Math.min(Number(args.limit) || 20, 50); 45 const listType = String(args.type ?? 'catalog'); 46 if (!TYPE_MAP[listType]) { 47 throw new CliError('INVALID_ARGUMENT', `Unknown type "${listType}". Valid types: ${Object.keys(TYPE_MAP).join(', ')}`); 48 } 49 const url = buildHotListUrl(listType); 50 await page.goto(url); 51 // Poll DOM until article links appear (36kr renders client-side) 52 const deadline = Date.now() + 5000; 53 while (Date.now() < deadline) { 54 if (await page.evaluate('document.querySelectorAll("a[href*=\\"/p/\\"]").length')) 55 break; 56 await new Promise(r => setTimeout(r, 300)); 57 } 58 // Scrape rendered article links from DOM (deduplicated) 59 const domItems = await page.evaluate(` 60 (() => { 61 const seen = new Set(); 62 const results = []; 63 const links = document.querySelectorAll('a[href*="/p/"]'); 64 for (const el of links) { 65 const href = el.getAttribute('href') || ''; 66 const title = el.textContent?.trim() || ''; 67 if (!title || title.length < 5 || seen.has(href) || seen.has(title)) continue; 68 seen.add(href); 69 seen.add(title); 70 results.push({ title, url: href.startsWith('http') ? href : 'https://36kr.com' + href }); 71 } 72 return results; 73 })() 74 `); 75 const items = Array.isArray(domItems) ? domItems : []; 76 if (items.length === 0) { 77 throw new CliError('NO_DATA', 'Could not retrieve 36kr hot list', '36kr may have changed its DOM structure'); 78 } 79 return items.slice(0, count).map((item, i) => ({ 80 rank: i + 1, 81 title: item.title, 82 url: item.url, 83 })); 84 }, 85 }); 86 export { buildHotListUrl, getShanghaiDate };