/ clis / tieba / posts.js
posts.js
 1  import { EmptyResultError } from '@jackwener/opencli/errors';
 2  import { cli, Strategy } from '@jackwener/opencli/registry';
 3  import { buildTiebaPostCardsFromPagePc, buildTiebaPostItems, normalizeTiebaLimit, signTiebaPcParams, } from './utils.js';
 4  function getForumPageNumber(kwargs) {
 5      return Math.max(1, Number(kwargs.page || 1));
 6  }
 7  function getForumUrl(kwargs) {
 8      const forum = String(kwargs.forum || '');
 9      return `https://tieba.baidu.com/f?kw=${encodeURIComponent(forum)}&ie=utf-8&pn=${(getForumPageNumber(kwargs) - 1) * 50}`;
10  }
11  /**
12   * Rebuild the signed page_pc request instead of scraping only the visible thread cards.
13   */
14  function buildTiebaPagePcParams(kwargs, limit) {
15      return {
16          kw: encodeURIComponent(String(kwargs.forum || '')),
17          pn: String(getForumPageNumber(kwargs)),
18          sort_type: '-1',
19          is_newfrs: '1',
20          is_newfeed: '1',
21          rn: '30',
22          rn_need: String(Math.min(Math.max(limit + 10, 10), 30)),
23          tbs: '',
24          subapp_type: 'pc',
25          _client_type: '20',
26      };
27  }
28  /**
29   * Tieba expects the signed forum-list request to be replayed with the browser's cookies.
30   */
31  async function fetchTiebaPagePc(page, kwargs, limit) {
32      await page.goto(getForumUrl(kwargs), { waitUntil: 'none' });
33      await page.wait(2);
34      const params = buildTiebaPagePcParams(kwargs, limit);
35      const cookies = await page.getCookies({ domain: 'tieba.baidu.com' });
36      const cookieHeader = cookies.map((item) => `${item.name}=${item.value}`).join('; ');
37      const body = new URLSearchParams({
38          ...params,
39          sign: signTiebaPcParams(params),
40      }).toString();
41      const response = await fetch('https://tieba.baidu.com/c/f/frs/page_pc', {
42          method: 'POST',
43          headers: {
44              'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
45              cookie: cookieHeader,
46              'x-requested-with': 'XMLHttpRequest',
47              referer: getForumUrl(kwargs),
48              'user-agent': 'Mozilla/5.0',
49          },
50          body,
51      });
52      const text = await response.text();
53      try {
54          return JSON.parse(text);
55      }
56      catch {
57          return {};
58      }
59  }
60  cli({
61      site: 'tieba',
62      name: 'posts',
63      description: 'Browse posts in a tieba forum',
64      domain: 'tieba.baidu.com',
65      strategy: Strategy.COOKIE,
66      browser: true,
67      navigateBefore: false,
68      args: [
69          { name: 'forum', positional: true, required: true, type: 'string', help: 'Forum name in Chinese' },
70          { name: 'page', type: 'int', default: 1, help: 'Page number' },
71          { name: 'limit', type: 'int', default: 20, help: 'Number of items to return' },
72      ],
73      columns: ['rank', 'title', 'author', 'replies'],
74      func: async (page, kwargs) => {
75          const limit = normalizeTiebaLimit(kwargs.limit);
76          const payload = await fetchTiebaPagePc(page, kwargs, limit);
77          const rawFeeds = Array.isArray(payload.page_data?.feed_list) ? payload.page_data.feed_list : [];
78          const rawCards = buildTiebaPostCardsFromPagePc(rawFeeds);
79          const items = buildTiebaPostItems(rawCards, limit);
80          if (!items.length || payload.error_code) {
81              throw new EmptyResultError('tieba posts', 'Tieba may have blocked the forum page, or the DOM structure may have changed');
82          }
83          return items;
84      },
85  });