search.js
1 /** 2 * 36kr article search — DOM scraping. 3 * 4 * Navigates to the 36kr search results page and scrapes rendered articles. 5 */ 6 import { cli, Strategy } from '@jackwener/opencli/registry'; 7 import { CliError } from '@jackwener/opencli/errors'; 8 cli({ 9 site: '36kr', 10 name: 'search', 11 description: '搜索36氪文章', 12 domain: 'www.36kr.com', 13 strategy: Strategy.PUBLIC, 14 browser: true, 15 args: [ 16 { name: 'query', positional: true, required: true, help: 'Search keyword (e.g. "AI", "OpenAI")' }, 17 { name: 'limit', type: 'int', default: 20, help: 'Number of results (max 50)' }, 18 ], 19 columns: ['rank', 'title', 'date', 'url'], 20 func: async (page, args) => { 21 const count = Math.min(Number(args.limit) || 20, 50); 22 const query = encodeURIComponent(String(args.query ?? '')); 23 await page.goto(`https://www.36kr.com/search/articles/${query}`); 24 // Poll DOM until article links appear (36kr renders client-side) 25 const deadline = Date.now() + 5000; 26 while (Date.now() < deadline) { 27 if (await page.evaluate('document.querySelectorAll("a[href*=\\"/p/\\"]").length')) 28 break; 29 await new Promise(r => setTimeout(r, 300)); 30 } 31 const domItems = await page.evaluate(` 32 (() => { 33 const seen = new Set(); 34 const results = []; 35 // article-item-title contains the clickable title link 36 const titleEls = document.querySelectorAll('.article-item-title a[href*="/p/"], .article-item-title[href*="/p/"]'); 37 for (const el of titleEls) { 38 const href = el.getAttribute('href') || ''; 39 const title = el.textContent?.trim() || ''; 40 if (!title || seen.has(href)) continue; 41 seen.add(href); 42 // Look for date near the article item 43 const item = el.closest('[class*="article-item"]') || el.parentElement; 44 const dateEl = item?.querySelector('[class*="time"], [class*="date"], time'); 45 const date = dateEl?.textContent?.trim() || ''; 46 results.push({ 47 title, 48 url: href.startsWith('http') ? href : 'https://36kr.com' + href, 49 date, 50 }); 51 } 52 // Fallback: generic /p/ links with meaningful text 53 if (results.length === 0) { 54 const links = document.querySelectorAll('a[href*="/p/"]'); 55 for (const el of links) { 56 const href = el.getAttribute('href') || ''; 57 const title = el.textContent?.trim() || ''; 58 if (!title || title.length < 8 || seen.has(href) || seen.has(title)) continue; 59 seen.add(href); 60 seen.add(title); 61 results.push({ title, url: href.startsWith('http') ? href : 'https://36kr.com' + href, date: '' }); 62 } 63 } 64 return results; 65 })() 66 `); 67 const items = Array.isArray(domItems) ? domItems : []; 68 if (items.length === 0) { 69 throw new CliError('NO_DATA', 'No results found', `Try a different query or check your keyword`); 70 } 71 return items.slice(0, count).map((item, i) => ({ 72 rank: i + 1, 73 title: item.title, 74 date: item.date, 75 url: item.url, 76 })); 77 }, 78 });