/ clis / tieba / utils.js
utils.js
  1  import { createHash } from 'node:crypto';
  2  /**
  3   * Shared Tieba parsing helpers used by the browser adapters.
  4   */
  5  export const MAX_TIEBA_LIMIT = 20;
  6  const TIEBA_PC_SIGN_SALT = '36770b1f34c9bbf2e7d1a99d2b82fa9e';
  7  const TIEBA_TIME_ZONE = 'Asia/Shanghai';
  8  /**
  9   * Keep the public CLI limit contract aligned with the real implementation.
 10   */
 11  export function normalizeTiebaLimit(value, fallback = MAX_TIEBA_LIMIT) {
 12      const parsed = Number(value ?? fallback);
 13      if (!Number.isFinite(parsed) || parsed < 1)
 14          return fallback;
 15      return Math.min(Math.trunc(parsed), MAX_TIEBA_LIMIT);
 16  }
 17  export function normalizeText(value) {
 18      return typeof value === 'string' ? value.replace(/\s+/g, ' ').trim() : '';
 19  }
 20  /**
 21   * Match Tieba PC's signed request contract so forum list fetching stays stable.
 22   */
 23  export function signTiebaPcParams(params) {
 24      const payload = Object.keys(params)
 25          .sort((left, right) => left.localeCompare(right))
 26          .map((key) => `${key}=${params[key]}`)
 27          .join('') + TIEBA_PC_SIGN_SALT;
 28      return createHash('md5').update(payload).digest('hex');
 29  }
 30  export function parseTiebaCount(text) {
 31      const value = normalizeText(text).toUpperCase();
 32      if (!value)
 33          return 0;
 34      const compact = value.replace(/[^\d.W万]/g, '');
 35      if (compact.endsWith('万')) {
 36          return Math.round(parseFloat(compact.slice(0, -1)) * 10000);
 37      }
 38      if (compact.endsWith('W')) {
 39          return Math.round(parseFloat(compact.slice(0, -1)) * 10000);
 40      }
 41      return parseInt(compact.replace(/[^\d]/g, ''), 10) || 0;
 42  }
 43  export function parseTiebaLastReply(text) {
 44      const normalized = normalizeText(text).replace(/^回复于/, '').trim();
 45      const match = normalized.match(/(刚刚|\d+\s*(?:分钟|小时|天)前|\d{2}-\d{2}(?:\s+\d{2}:\d{2})?|\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)/);
 46      return match ? match[1].trim() : normalized;
 47  }
 48  function buildTiebaThreadUrl(id, rawUrl) {
 49      const explicitUrl = normalizeText(rawUrl);
 50      if (explicitUrl)
 51          return explicitUrl;
 52      return id ? `https://tieba.baidu.com/p/${id}` : '';
 53  }
 54  function resolveTiebaThreadId(raw) {
 55      const direct = normalizeText(raw.threadId);
 56      if (direct)
 57          return direct;
 58      const fromUrl = normalizeText(raw.url).match(/\/p\/(\d+)/);
 59      return fromUrl ? fromUrl[1] : '';
 60  }
 61  function getTiebaFeedComponent(feed, name) {
 62      const components = Array.isArray(feed?.components) ? feed.components : [];
 63      const match = components.find((entry) => normalizeText(entry.component) === name);
 64      if (!match)
 65          return {};
 66      const payload = match[name];
 67      return payload && typeof payload === 'object' ? payload : {};
 68  }
 69  function extractTiebaFeedAuthor(feed) {
 70      const head = getTiebaFeedComponent(feed, 'feed_head');
 71      const mainData = Array.isArray(head.main_data) ? head.main_data : [];
 72      for (const item of mainData) {
 73          const textRecord = item.text;
 74          const author = normalizeText(textRecord?.text);
 75          if (author)
 76              return author;
 77      }
 78      return '';
 79  }
 80  function extractTiebaFeedTitle(feed) {
 81      const title = getTiebaFeedComponent(feed, 'feed_title');
 82      const titleData = Array.isArray(title.data) ? title.data : [];
 83      const firstTitle = titleData[0];
 84      const textInfo = firstTitle?.text_info;
 85      return normalizeText(textInfo?.text) || normalizeText(feed?.business_info_map?.title);
 86  }
 87  function extractTiebaFeedCommentCount(feed) {
 88      const social = getTiebaFeedComponent(feed, 'feed_social');
 89      const commentCount = Number(social.comment_num ?? feed?.business_info_map?.comment_num ?? 0);
 90      return Number.isFinite(commentCount) ? commentCount : 0;
 91  }
 92  function extractTiebaFeedThreadId(feed) {
 93      const direct = normalizeText(feed?.business_info_map?.thread_id);
 94      if (direct)
 95          return direct;
 96      const logParams = Array.isArray(feed?.log_param) ? feed.log_param : [];
 97      const fromLog = normalizeText(logParams.find((item) => normalizeText(item?.key) === 'tid')?.value);
 98      if (fromLog)
 99          return fromLog;
100      const fromSchema = normalizeText(feed?.schema).match(/[?&]tid=(\d+)/);
101      return fromSchema ? fromSchema[1] : '';
102  }
103  function extractTiebaFeedLastReply(feed) {
104      const head = getTiebaFeedComponent(feed, 'feed_head');
105      const extraData = Array.isArray(head.extra_data) ? head.extra_data : [];
106      const first = extraData[0];
107      const prefix = normalizeText(first?.business_info_map?.time_prefix);
108      const textRecord = first?.text;
109      const rawTime = normalizeText(textRecord?.text);
110      const formattedTime = /^\d+$/.test(rawTime) ? formatTiebaUnixTime(rawTime) : rawTime;
111      return [prefix, formattedTime].filter(Boolean).join('');
112  }
113  /**
114   * Convert Tieba's signed `page_pc` feed entries into the stable card shape used by the CLI.
115   */
116  export function buildTiebaPostCardsFromPagePc(rawFeeds) {
117      return rawFeeds
118          .filter((entry) => normalizeText(entry.layout) === 'feed' && entry.feed)
119          .map((entry) => {
120          const feed = entry.feed;
121          const threadId = extractTiebaFeedThreadId(feed);
122          return {
123              title: extractTiebaFeedTitle(feed),
124              author: extractTiebaFeedAuthor(feed),
125              descInfo: extractTiebaFeedLastReply(feed),
126              commentCount: extractTiebaFeedCommentCount(feed),
127              actionTexts: [],
128              threadId,
129              url: buildTiebaThreadUrl(threadId),
130          };
131      })
132          .filter((entry) => normalizeText(entry.title));
133  }
134  export function buildTiebaPostItems(rawCards, requestedLimit) {
135      const limit = normalizeTiebaLimit(requestedLimit);
136      return rawCards
137          .map((raw) => {
138          const title = normalizeText(raw.title);
139          const id = resolveTiebaThreadId(raw);
140          const actionTexts = Array.isArray(raw.actionTexts) ? raw.actionTexts.map(normalizeText).filter(Boolean) : [];
141          const commentText = actionTexts.find((text) => /评论/.test(text)) || actionTexts[actionTexts.length - 1] || '';
142          return {
143              title,
144              author: normalizeText(raw.author),
145              replies: Number.isFinite(Number(raw.commentCount))
146                  ? Number(raw.commentCount)
147                  : parseTiebaCount(commentText),
148              last_reply: parseTiebaLastReply(String(raw.descInfo ?? '')),
149              id,
150              url: buildTiebaThreadUrl(id, raw.url),
151          };
152      })
153          .filter((item) => item.title)
154          .slice(0, limit)
155          .map((item, index) => ({ rank: index + 1, ...item }));
156  }
157  export function buildTiebaSearchItems(rawItems, requestedLimit) {
158      const limit = normalizeTiebaLimit(requestedLimit);
159      return rawItems
160          .map((raw) => {
161          const url = normalizeText(raw.url);
162          const directId = normalizeText(raw.id);
163          const idFromUrl = url.match(/\/p\/(\d+)/)?.[1] || '';
164          return {
165              title: normalizeText(raw.title),
166              forum: normalizeText(raw.forum),
167              author: normalizeText(raw.author),
168              time: normalizeText(raw.time),
169              snippet: normalizeText(raw.snippet).slice(0, 200),
170              id: directId || idFromUrl,
171              url,
172          };
173      })
174          .filter((item) => item.title)
175          .slice(0, limit)
176          .map((item, index) => ({ rank: index + 1, ...item }));
177  }
178  function formatTiebaUnixTime(value) {
179      const ts = Number(value || 0);
180      if (!Number.isFinite(ts) || ts <= 0)
181          return '';
182      const parts = new Intl.DateTimeFormat('sv-SE', {
183          timeZone: TIEBA_TIME_ZONE,
184          year: 'numeric',
185          month: '2-digit',
186          day: '2-digit',
187          hour: '2-digit',
188          minute: '2-digit',
189          hour12: false,
190      }).formatToParts(new Date(ts * 1000));
191      const values = Object.fromEntries(parts.map((part) => [part.type, part.value]));
192      return `${values.year}-${values.month}-${values.day} ${values.hour}:${values.minute}`;
193  }
194  function parseTiebaReplyTime(text) {
195      const normalized = normalizeText(text);
196      const withoutFloor = normalized.replace(/^第\d+楼\s+/, '').trim();
197      const match = withoutFloor.match(/^(刚刚|昨天|前天|\d+\s*(?:分钟|小时|天)前|\d{2}-\d{2}(?:\s+\d{2}:\d{2})?|\d{4}-\d{2}-\d{2}(?:\s+\d{2}:\d{2})?)/);
198      return match ? match[1].trim() : withoutFloor;
199  }
200  function buildMainPostItem(mainPost) {
201      if (!mainPost)
202          return null;
203      const title = normalizeText(mainPost.title);
204      const author = normalizeText(mainPost.author) || normalizeText(mainPost.fallbackAuthor);
205      const body = normalizeText(mainPost.contentText) || normalizeText(mainPost.structuredText);
206      const hasMedia = Boolean(mainPost.hasMedia);
207      const content = [title, body || (hasMedia ? '[media]' : '')].filter(Boolean).join(' ').trim();
208      if (!content)
209          return null;
210      return {
211          floor: 1,
212          author,
213          content,
214          time: normalizeText(mainPost.visibleTime) || formatTiebaUnixTime(mainPost.structuredTime),
215      };
216  }
217  export function buildTiebaReadItems(payload, options = {}) {
218      const fallback = Number.isFinite(Number(options.limit)) ? Number(options.limit) : 30;
219      const limit = Math.max(1, Math.trunc(fallback));
220      const includeMainPost = options.includeMainPost !== false;
221      const items = [];
222      const mainPost = buildMainPostItem(payload.mainPost);
223      if (includeMainPost && mainPost)
224          items.push(mainPost);
225      const replies = Array.isArray(payload.replies) ? payload.replies : [];
226      const replyItems = [];
227      for (const reply of replies) {
228          const floor = Number(reply.floor || 0);
229          const content = normalizeText(reply.content);
230          if (!Number.isFinite(floor) || floor < 1 || !content)
231              continue;
232          replyItems.push({
233              floor,
234              author: normalizeText(reply.author),
235              content,
236              time: parseTiebaReplyTime(String(reply.time ?? '')),
237          });
238      }
239      return items.concat(replyItems.slice(0, limit));
240  }