utils.js
1 import { createHash } from 'node:crypto'; 2 /** 3 * Shared Tieba parsing helpers used by the browser adapters. 4 */ 5 export const MAX_TIEBA_LIMIT = 20; 6 const TIEBA_PC_SIGN_SALT = '36770b1f34c9bbf2e7d1a99d2b82fa9e'; 7 const TIEBA_TIME_ZONE = 'Asia/Shanghai'; 8 /** 9 * Keep the public CLI limit contract aligned with the real implementation. 10 */ 11 export function normalizeTiebaLimit(value, fallback = MAX_TIEBA_LIMIT) { 12 const parsed = Number(value ?? fallback); 13 if (!Number.isFinite(parsed) || parsed < 1) 14 return fallback; 15 return Math.min(Math.trunc(parsed), MAX_TIEBA_LIMIT); 16 } 17 export function normalizeText(value) { 18 return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : ''; 19 } 20 /** 21 * Match Tieba PC's signed request contract so forum list fetching stays stable. 22 */ 23 export function signTiebaPcParams(params) { 24 const payload = Object.keys(params) 25 .sort((left, right) => left.localeCompare(right)) 26 .map((key) => `${key}=${params[key]}`) 27 .join('') + TIEBA_PC_SIGN_SALT; 28 return createHash('md5').update(payload).digest('hex'); 29 } 30 export function parseTiebaCount(text) { 31 const value = normalizeText(text).toUpperCase(); 32 if (!value) 33 return 0; 34 const compact = value.replace(/[^\d.W万]/g, ''); 35 if (compact.endsWith('万')) { 36 return Math.round(parseFloat(compact.slice(0, -1)) * 10000); 37 } 38 if (compact.endsWith('W')) { 39 return Math.round(parseFloat(compact.slice(0, -1)) * 10000); 40 } 41 return parseInt(compact.replace(/[^\d]/g, ''), 10) || 0; 42 } 43 export function parseTiebaLastReply(text) { 44 const normalized = normalizeText(text).replace(/^回复于/, '').trim(); 45 const match = normalized.match(/(刚刚|\d+\s*(?:分钟|小时|天)前|\d{2}-\d{2}(?:\s+\d{2}:\d{2})?|\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)/); 46 return match ? match[1].trim() : normalized; 47 } 48 function buildTiebaThreadUrl(id, rawUrl) { 49 const explicitUrl = normalizeText(rawUrl); 50 if (explicitUrl) 51 return explicitUrl; 52 return id ? `https://tieba.baidu.com/p/${id}` : ''; 53 } 54 function resolveTiebaThreadId(raw) { 55 const direct = normalizeText(raw.threadId); 56 if (direct) 57 return direct; 58 const fromUrl = normalizeText(raw.url).match(/\/p\/(\d+)/); 59 return fromUrl ? fromUrl[1] : ''; 60 } 61 function getTiebaFeedComponent(feed, name) { 62 const components = Array.isArray(feed?.components) ? feed.components : []; 63 const match = components.find((entry) => normalizeText(entry.component) === name); 64 if (!match) 65 return {}; 66 const payload = match[name]; 67 return payload && typeof payload === 'object' ? payload : {}; 68 } 69 function extractTiebaFeedAuthor(feed) { 70 const head = getTiebaFeedComponent(feed, 'feed_head'); 71 const mainData = Array.isArray(head.main_data) ? head.main_data : []; 72 for (const item of mainData) { 73 const textRecord = item.text; 74 const author = normalizeText(textRecord?.text); 75 if (author) 76 return author; 77 } 78 return ''; 79 } 80 function extractTiebaFeedTitle(feed) { 81 const title = getTiebaFeedComponent(feed, 'feed_title'); 82 const titleData = Array.isArray(title.data) ? title.data : []; 83 const firstTitle = titleData[0]; 84 const textInfo = firstTitle?.text_info; 85 return normalizeText(textInfo?.text) || normalizeText(feed?.business_info_map?.title); 86 } 87 function extractTiebaFeedCommentCount(feed) { 88 const social = getTiebaFeedComponent(feed, 'feed_social'); 89 const commentCount = Number(social.comment_num ?? feed?.business_info_map?.comment_num ?? 0); 90 return Number.isFinite(commentCount) ? commentCount : 0; 91 } 92 function extractTiebaFeedThreadId(feed) { 93 const direct = normalizeText(feed?.business_info_map?.thread_id); 94 if (direct) 95 return direct; 96 const logParams = Array.isArray(feed?.log_param) ? feed.log_param : []; 97 const fromLog = normalizeText(logParams.find((item) => normalizeText(item?.key) === 'tid')?.value); 98 if (fromLog) 99 return fromLog; 100 const fromSchema = normalizeText(feed?.schema).match(/[?&]tid=(\d+)/); 101 return fromSchema ? fromSchema[1] : ''; 102 } 103 function extractTiebaFeedLastReply(feed) { 104 const head = getTiebaFeedComponent(feed, 'feed_head'); 105 const extraData = Array.isArray(head.extra_data) ? head.extra_data : []; 106 const first = extraData[0]; 107 const prefix = normalizeText(first?.business_info_map?.time_prefix); 108 const textRecord = first?.text; 109 const rawTime = normalizeText(textRecord?.text); 110 const formattedTime = /^\d+$/.test(rawTime) ? formatTiebaUnixTime(rawTime) : rawTime; 111 return [prefix, formattedTime].filter(Boolean).join(''); 112 } 113 /** 114 * Convert Tieba's signed `page_pc` feed entries into the stable card shape used by the CLI. 115 */ 116 export function buildTiebaPostCardsFromPagePc(rawFeeds) { 117 return rawFeeds 118 .filter((entry) => normalizeText(entry.layout) === 'feed' && entry.feed) 119 .map((entry) => { 120 const feed = entry.feed; 121 const threadId = extractTiebaFeedThreadId(feed); 122 return { 123 title: extractTiebaFeedTitle(feed), 124 author: extractTiebaFeedAuthor(feed), 125 descInfo: extractTiebaFeedLastReply(feed), 126 commentCount: extractTiebaFeedCommentCount(feed), 127 actionTexts: [], 128 threadId, 129 url: buildTiebaThreadUrl(threadId), 130 }; 131 }) 132 .filter((entry) => normalizeText(entry.title)); 133 } 134 export function buildTiebaPostItems(rawCards, requestedLimit) { 135 const limit = normalizeTiebaLimit(requestedLimit); 136 return rawCards 137 .map((raw) => { 138 const title = normalizeText(raw.title); 139 const id = resolveTiebaThreadId(raw); 140 const actionTexts = Array.isArray(raw.actionTexts) ? raw.actionTexts.map(normalizeText).filter(Boolean) : []; 141 const commentText = actionTexts.find((text) => /评论/.test(text)) || actionTexts[actionTexts.length - 1] || ''; 142 return { 143 title, 144 author: normalizeText(raw.author), 145 replies: Number.isFinite(Number(raw.commentCount)) 146 ? Number(raw.commentCount) 147 : parseTiebaCount(commentText), 148 last_reply: parseTiebaLastReply(String(raw.descInfo ?? '')), 149 id, 150 url: buildTiebaThreadUrl(id, raw.url), 151 }; 152 }) 153 .filter((item) => item.title) 154 .slice(0, limit) 155 .map((item, index) => ({ rank: index + 1, ...item })); 156 } 157 export function buildTiebaSearchItems(rawItems, requestedLimit) { 158 const limit = normalizeTiebaLimit(requestedLimit); 159 return rawItems 160 .map((raw) => { 161 const url = normalizeText(raw.url); 162 const directId = normalizeText(raw.id); 163 const idFromUrl = url.match(/\/p\/(\d+)/)?.[1] || ''; 164 return { 165 title: normalizeText(raw.title), 166 forum: normalizeText(raw.forum), 167 author: normalizeText(raw.author), 168 time: normalizeText(raw.time), 169 snippet: normalizeText(raw.snippet).slice(0, 200), 170 id: directId || idFromUrl, 171 url, 172 }; 173 }) 174 .filter((item) => item.title) 175 .slice(0, limit) 176 .map((item, index) => ({ rank: index + 1, ...item })); 177 } 178 function formatTiebaUnixTime(value) { 179 const ts = Number(value || 0); 180 if (!Number.isFinite(ts) || ts <= 0) 181 return ''; 182 const parts = new Intl.DateTimeFormat('sv-SE', { 183 timeZone: TIEBA_TIME_ZONE, 184 year: 'numeric', 185 month: '2-digit', 186 day: '2-digit', 187 hour: '2-digit', 188 minute: '2-digit', 189 hour12: false, 190 }).formatToParts(new Date(ts * 1000)); 191 const values = Object.fromEntries(parts.map((part) => [part.type, part.value])); 192 return `${values.year}-${values.month}-${values.day} ${values.hour}:${values.minute}`; 193 } 194 function parseTiebaReplyTime(text) { 195 const normalized = normalizeText(text); 196 const withoutFloor = normalized.replace(/^第\d+楼\s+/, '').trim(); 197 const match = withoutFloor.match(/^(刚刚|昨天|前天|\d+\s*(?:分钟|小时|天)前|\d{2}-\d{2}(?:\s+\d{2}:\d{2})?|\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)/); 198 return match ? match[1].trim() : withoutFloor; 199 } 200 function buildMainPostItem(mainPost) { 201 if (!mainPost) 202 return null; 203 const title = normalizeText(mainPost.title); 204 const author = normalizeText(mainPost.author) || normalizeText(mainPost.fallbackAuthor); 205 const body = normalizeText(mainPost.contentText) || normalizeText(mainPost.structuredText); 206 const hasMedia = Boolean(mainPost.hasMedia); 207 const content = [title, body || (hasMedia ? '[media]' : '')].filter(Boolean).join(' ').trim(); 208 if (!content) 209 return null; 210 return { 211 floor: 1, 212 author, 213 content, 214 time: normalizeText(mainPost.visibleTime) || formatTiebaUnixTime(mainPost.structuredTime), 215 }; 216 } 217 export function buildTiebaReadItems(payload, options = {}) { 218 const fallback = Number.isFinite(Number(options.limit)) ? Number(options.limit) : 30; 219 const limit = Math.max(1, Math.trunc(fallback)); 220 const includeMainPost = options.includeMainPost !== false; 221 const items = []; 222 const mainPost = buildMainPostItem(payload.mainPost); 223 if (includeMainPost && mainPost) 224 items.push(mainPost); 225 const replies = Array.isArray(payload.replies) ? payload.replies : []; 226 const replyItems = []; 227 for (const reply of replies) { 228 const floor = Number(reply.floor || 0); 229 const content = normalizeText(reply.content); 230 if (!Number.isFinite(floor) || floor < 1 || !content) 231 continue; 232 replyItems.push({ 233 floor, 234 author: normalizeText(reply.author), 235 content, 236 time: parseTiebaReplyTime(String(reply.time ?? '')), 237 }); 238 } 239 return items.concat(replyItems.slice(0, limit)); 240 }