posts.js
1 import { EmptyResultError } from '@jackwener/opencli/errors'; 2 import { cli, Strategy } from '@jackwener/opencli/registry'; 3 import { buildTiebaPostCardsFromPagePc, buildTiebaPostItems, normalizeTiebaLimit, signTiebaPcParams, } from './utils.js'; 4 function getForumPageNumber(kwargs) { 5 return Math.max(1, Number(kwargs.page || 1)); 6 } 7 function getForumUrl(kwargs) { 8 const forum = String(kwargs.forum || ''); 9 return `https://tieba.baidu.com/f?kw=${encodeURIComponent(forum)}&ie=utf-8&pn=${(getForumPageNumber(kwargs) - 1) * 50}`; 10 } 11 /** 12 * Rebuild the signed page_pc request instead of scraping only the visible thread cards. 13 */ 14 function buildTiebaPagePcParams(kwargs, limit) { 15 return { 16 kw: encodeURIComponent(String(kwargs.forum || '')), 17 pn: String(getForumPageNumber(kwargs)), 18 sort_type: '-1', 19 is_newfrs: '1', 20 is_newfeed: '1', 21 rn: '30', 22 rn_need: String(Math.min(Math.max(limit + 10, 10), 30)), 23 tbs: '', 24 subapp_type: 'pc', 25 _client_type: '20', 26 }; 27 } 28 /** 29 * Tieba expects the signed forum-list request to be replayed with the browser's cookies. 30 */ 31 async function fetchTiebaPagePc(page, kwargs, limit) { 32 await page.goto(getForumUrl(kwargs), { waitUntil: 'none' }); 33 await page.wait(2); 34 const params = buildTiebaPagePcParams(kwargs, limit); 35 const cookies = await page.getCookies({ domain: 'tieba.baidu.com' }); 36 const cookieHeader = cookies.map((item) => `${item.name}=${item.value}`).join('; '); 37 const body = new URLSearchParams({ 38 ...params, 39 sign: signTiebaPcParams(params), 40 }).toString(); 41 const response = await fetch('https://tieba.baidu.com/c/f/frs/page_pc', { 42 method: 'POST', 43 headers: { 44 'content-type': 'application/x-www-form-urlencoded;charset=UTF-8', 45 cookie: cookieHeader, 46 'x-requested-with': 'XMLHttpRequest', 47 referer: getForumUrl(kwargs), 48 'user-agent': 'Mozilla/5.0', 49 }, 50 body, 51 }); 52 const text = await response.text(); 53 try { 54 return JSON.parse(text); 55 } 56 catch { 57 return {}; 58 } 59 } 60 cli({ 61 site: 'tieba', 62 name: 'posts', 63 description: 'Browse posts in a tieba forum', 64 domain: 'tieba.baidu.com', 65 strategy: Strategy.COOKIE, 66 browser: true, 67 navigateBefore: false, 68 args: [ 69 { name: 'forum', positional: true, required: true, type: 'string', help: 'Forum name in Chinese' }, 70 { name: 'page', type: 'int', default: 1, help: 'Page number' }, 71 { name: 'limit', type: 'int', default: 20, help: 'Number of items to return' }, 72 ], 73 columns: ['rank', 'title', 'author', 'replies'], 74 func: async (page, kwargs) => { 75 const limit = normalizeTiebaLimit(kwargs.limit); 76 const payload = await fetchTiebaPagePc(page, kwargs, limit); 77 const rawFeeds = Array.isArray(payload.page_data?.feed_list) ? payload.page_data.feed_list : []; 78 const rawCards = buildTiebaPostCardsFromPagePc(rawFeeds); 79 const items = buildTiebaPostItems(rawCards, limit); 80 if (!items.length || payload.error_code) { 81 throw new EmptyResultError('tieba posts', 'Tieba may have blocked the forum page, or the DOM structure may have changed'); 82 } 83 return items; 84 }, 85 });