/ clis / 36kr / article.js
article.js
 1  /**
 2   * 36kr article detail — INTERCEPT strategy.
 3   *
 4   * Fetches the full content of a 36kr article given its ID or URL.
 5   */
 6  import { cli, Strategy } from '@jackwener/opencli/registry';
 7  import { CliError } from '@jackwener/opencli/errors';
 8  /** Extract article ID from a full URL or a bare numeric ID string */
 9  function parseArticleId(input) {
10      const m = input.match(/\/p\/(\d+)/);
11      return m ? m[1] : input.replace(/\D/g, '');
12  }
13  cli({
14      site: '36kr',
15      name: 'article',
16      description: '获取36氪文章正文内容',
17      domain: 'www.36kr.com',
18      strategy: Strategy.INTERCEPT,
19      args: [
20          { name: 'id', positional: true, required: true, help: 'Article ID or full 36kr article URL' },
21      ],
22      columns: ['field', 'value'],
23      func: async (page, args) => {
24          const articleId = parseArticleId(String(args.id ?? ''));
25          if (!articleId) {
26              throw new CliError('INVALID_ARGUMENT', 'Invalid article ID or URL');
27          }
28          await page.installInterceptor('36kr.com/api');
29          await page.goto(`https://www.36kr.com/p/${articleId}`);
30          await page.wait(5);
31          const data = await page.evaluate(`
32        (() => {
33          // Title: 36kr uses class "article-title" on h1
34          const title = document.querySelector('.article-title, h1')?.textContent?.trim() || '';
35          // Author: second .author-name (first is empty nav link, second has real name)
36          const authorEls = document.querySelectorAll('.author-name');
37          const author = Array.from(authorEls).map(el => el.textContent?.trim()).filter(Boolean)[0] || '';
38          // Date: 36kr uses class "title-icon-item item-time" for the publish date
39          const dateRaw = document.querySelector('.item-time')?.textContent?.trim() || '';
40          const date = dateRaw.replace(/^[·\s]+/, '').trim();
41          // Article body paragraphs
42          const bodyEls = document.querySelectorAll('[class*="article-content"] p, [class*="rich-text"] p, .article p');
43          const body = Array.from(bodyEls)
44            .map(el => el.textContent?.trim())
45            .filter(t => t && t.length > 10)
46            .join(' ')
47            .slice(0, 800);
48          return { title, author, date, body };
49        })()
50      `);
51          if (!data?.title) {
52              throw new CliError('NOT_FOUND', 'Article not found or failed to load', 'Check the article ID');
53          }
54          return [
55              { field: 'title', value: data.title },
56              { field: 'author', value: data.author || '-' },
57              { field: 'date', value: data.date || '-' },
58              { field: 'url', value: `https://36kr.com/p/${articleId}` },
59              { field: 'body', value: data.body || '-' },
60          ];
61      },
62  });