article.js
1 /** 2 * 36kr article detail — INTERCEPT strategy. 3 * 4 * Fetches the full content of a 36kr article given its ID or URL. 5 */ 6 import { cli, Strategy } from '@jackwener/opencli/registry'; 7 import { CliError } from '@jackwener/opencli/errors'; 8 /** Extract article ID from a full URL or a bare numeric ID string */ 9 function parseArticleId(input) { 10 const m = input.match(/\/p\/(\d+)/); 11 return m ? m[1] : input.replace(/\D/g, ''); 12 } 13 cli({ 14 site: '36kr', 15 name: 'article', 16 description: '获取36氪文章正文内容', 17 domain: 'www.36kr.com', 18 strategy: Strategy.INTERCEPT, 19 args: [ 20 { name: 'id', positional: true, required: true, help: 'Article ID or full 36kr article URL' }, 21 ], 22 columns: ['field', 'value'], 23 func: async (page, args) => { 24 const articleId = parseArticleId(String(args.id ?? '')); 25 if (!articleId) { 26 throw new CliError('INVALID_ARGUMENT', 'Invalid article ID or URL'); 27 } 28 await page.installInterceptor('36kr.com/api'); 29 await page.goto(`https://www.36kr.com/p/${articleId}`); 30 await page.wait(5); 31 const data = await page.evaluate(` 32 (() => { 33 // Title: 36kr uses class "article-title" on h1 34 const title = document.querySelector('.article-title, h1')?.textContent?.trim() || ''; 35 // Author: second .author-name (first is empty nav link, second has real name) 36 const authorEls = document.querySelectorAll('.author-name'); 37 const author = Array.from(authorEls).map(el => el.textContent?.trim()).filter(Boolean)[0] || ''; 38 // Date: 36kr uses class "title-icon-item item-time" for the publish date 39 const dateRaw = document.querySelector('.item-time')?.textContent?.trim() || ''; 40 const date = dateRaw.replace(/^[·\s]+/, '').trim(); 41 // Article body paragraphs 42 const bodyEls = document.querySelectorAll('[class*="article-content"] p, [class*="rich-text"] p, .article p'); 43 const body = Array.from(bodyEls) 44 .map(el => el.textContent?.trim()) 45 .filter(t => t && t.length > 10) 46 .join(' ') 47 .slice(0, 800); 48 return { title, author, date, body }; 49 })() 50 `); 51 if (!data?.title) { 52 throw new CliError('NOT_FOUND', 'Article not found or failed to load', 'Check the article ID'); 53 } 54 return [ 55 { field: 'title', value: data.title }, 56 { field: 'author', value: data.author || '-' }, 57 { field: 'date', value: data.date || '-' }, 58 { field: 'url', value: `https://36kr.com/p/${articleId}` }, 59 { field: 'body', value: data.body || '-' }, 60 ]; 61 }, 62 });