/ clis / hupu / utils.js
utils.js
  1  import { AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
  2  export function stripHtml(html) {
  3      if (!html)
  4          return '';
  5      const decoded = html
  6          .replace(/\\u003c/g, '<')
  7          .replace(/\\u003e/g, '>')
  8          .replace(/\\n/g, '\n')
  9          .replace(/\\r/g, '');
 10      return decoded.replace(/<[^>]+>/g, '').trim();
 11  }
 12  export function decodeHtmlEntities(html) {
 13      if (!html)
 14          return '';
 15      return html.replace(/&nbsp;/g, ' ')
 16          .replace(/&lt;/g, '<')
 17          .replace(/&gt;/g, '>')
 18          .replace(/&amp;/g, '&')
 19          .replace(/&quot;/g, '"')
 20          .replace(/&#x27;/g, "'");
 21  }
 22  export function getHupuThreadUrl(tid) {
 23      return `https://bbs.hupu.com/${encodeURIComponent(String(tid))}-1.html`;
 24  }
 25  export function getHupuSearchUrl(query, page, forum, sort) {
 26      const searchParams = new URLSearchParams();
 27      searchParams.append('q', String(query));
 28      searchParams.append('page', String(page));
 29      if (forum) {
 30          searchParams.append('topicId', String(forum));
 31      }
 32      if (sort) {
 33          searchParams.append('sortby', String(sort));
 34      }
 35      return `https://bbs.hupu.com/search?${searchParams.toString()}`;
 36  }
 37  export async function readHupuNextData(page, url, actionLabel, options = {}) {
 38      await page.goto(url);
 39      const result = await page.evaluate(`
 40      (async () => {
 41        const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
 42        const expectedTid = ${JSON.stringify(options.expectedTid || '')};
 43        const timeoutMs = ${JSON.stringify(options.timeoutMs ?? 5000)};
 44        let lastSeenTid = '';
 45        let lastSeenHref = '';
 46  
 47        const waitFor = async (predicate, limitMs = timeoutMs) => {
 48          const start = Date.now();
 49          while (Date.now() - start < limitMs) {
 50            if (predicate()) return true;
 51            await wait(100);
 52          }
 53          return false;
 54        };
 55  
 56        const ready = await waitFor(() => {
 57          const script = document.getElementById('__NEXT_DATA__');
 58          if (!script?.textContent) return false;
 59  
 60          lastSeenHref = location.href;
 61  
 62          try {
 63            const parsed = JSON.parse(script.textContent);
 64            const threadTid = parsed?.props?.pageProps?.detail?.thread?.tid;
 65            lastSeenTid = typeof threadTid === 'string' ? threadTid : '';
 66  
 67            if (!expectedTid) return true;
 68            return threadTid === expectedTid;
 69          } catch {
 70            return false;
 71          }
 72        });
 73        if (!ready) {
 74          return {
 75            ok: false,
 76            error: expectedTid
 77              ? \`帖子数据未就绪或tid不匹配(expected=\${expectedTid}, actual=\${lastSeenTid || 'unknown'}, href=\${lastSeenHref || location.href})\`
 78              : '无法找到帖子数据'
 79          };
 80        }
 81  
 82        try {
 83          const text = document.getElementById('__NEXT_DATA__')?.textContent || '';
 84          return {
 85            ok: true,
 86            data: JSON.parse(text)
 87          };
 88        } catch (error) {
 89          return {
 90            ok: false,
 91            error: error instanceof Error ? error.message : String(error)
 92          };
 93        }
 94      })()
 95    `);
 96      if (!result || typeof result !== 'object' || !result.ok) {
 97          throw new CommandExecutionError(`${actionLabel} failed: ${result?.error || 'invalid browser response'}`);
 98      }
 99      return result.data;
100  }
101  export async function readHupuSearchData(page, url, actionLabel) {
102      await page.goto(url);
103      const result = await page.evaluate(`
104      (async () => {
105        const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
106        const waitFor = async (predicate, timeoutMs = 5000) => {
107          const start = Date.now();
108          while (Date.now() - start < timeoutMs) {
109            if (predicate()) return true;
110            await wait(100);
111          }
112          return false;
113        };
114  
115        const extractFromScript = () => {
116          const marker = 'window.$$data=';
117          for (const script of Array.from(document.scripts)) {
118            const text = script.textContent || '';
119            const dataIndex = text.indexOf(marker);
120            if (dataIndex === -1) continue;
121  
122            const jsonStart = dataIndex + marker.length;
123            let braceCount = 0;
124            let jsonEnd = jsonStart;
125            let inString = false;
126            let escapeNext = false;
127  
128            for (let i = jsonStart; i < text.length; i++) {
129              const char = text[i];
130  
131              if (escapeNext) {
132                escapeNext = false;
133                continue;
134              }
135  
136              if (char === '\\\\') {
137                escapeNext = true;
138                continue;
139              }
140  
141              if (char === '"') {
142                inString = !inString;
143                continue;
144              }
145  
146              if (!inString) {
147                if (char === '{') {
148                  braceCount++;
149                } else if (char === '}') {
150                  braceCount--;
151                  if (braceCount === 0) {
152                    jsonEnd = i;
153                    break;
154                  }
155                }
156              }
157            }
158  
159            if (jsonEnd > jsonStart) {
160              return text.substring(jsonStart, jsonEnd + 1);
161            }
162          }
163          return '';
164        };
165  
166        const ready = await waitFor(() => {
167          return typeof window.$$data !== 'undefined' || Boolean(extractFromScript());
168        });
169        if (!ready) {
170          return { ok: false, error: '无法找到搜索数据' };
171        }
172  
173        try {
174          if (typeof window.$$data !== 'undefined') {
175            return {
176              ok: true,
177              data: JSON.parse(JSON.stringify(window.$$data))
178            };
179          }
180  
181          const jsonString = extractFromScript();
182          return {
183            ok: true,
184            data: JSON.parse(jsonString)
185          };
186        } catch (error) {
187          return {
188            ok: false,
189            error: error instanceof Error ? error.message : String(error)
190          };
191        }
192      })()
193    `);
194      if (!result || typeof result !== 'object' || !result.ok) {
195          throw new CommandExecutionError(`${actionLabel} failed: ${result?.error || 'invalid browser response'}`);
196      }
197      return result.data;
198  }
199  function buildBrowserJsonPostScript(apiUrl, body, mode) {
200      return `
201      (async () => {
202        const url = ${JSON.stringify(apiUrl)};
203        const payload = ${JSON.stringify(body)};
204        const mode = ${JSON.stringify(mode)};
205        const getCookie = (name) => document.cookie
206          .split('; ')
207          .find((item) => item.startsWith(name + '='))
208          ?.slice(name.length + 1) || '';
209  
210        const findThumbcacheValue = () => {
211          const rawEntry = document.cookie
212            .split('; ')
213            .find((item) => item.startsWith('.thumbcache_'));
214          if (rawEntry && rawEntry.includes('=')) {
215            const rawValue = rawEntry.slice(rawEntry.indexOf('=') + 1);
216            try {
217              return decodeURIComponent(rawValue);
218            } catch {
219              return rawValue;
220            }
221          }
222  
223          const storageKey = Object.keys(localStorage).find((key) => key.startsWith('.thumbcache_'));
224          if (!storageKey) return '';
225          return localStorage.getItem(storageKey) || '';
226        };
227  
228        const resolveDefaultPayload = (input) => {
229          const next = { ...input };
230          const sensorsRaw = decodeURIComponent(getCookie('sensorsdata2015jssdkcross') || '');
231          let deviceid = '';
232          try {
233            const sensors = JSON.parse(sensorsRaw);
234            deviceid = sensors?.props?.['$device_id'] || sensors?.distinct_id || '';
235          } catch {}
236  
237          if ((next.puid === '' || next.puid == null) && getCookie('ua')) {
238            next.puid = getCookie('ua');
239          }
240          if ((next.shumei_id === '' || next.shumei_id == null) && getCookie('smidV2')) {
241            next.shumei_id = getCookie('smidV2');
242          }
243          if ((next.deviceid === '' || next.deviceid == null) && deviceid) {
244            next.deviceid = deviceid;
245          }
246          return next;
247        };
248  
249        const resolveReplyPayload = (input) => {
250          const next = { ...input };
251          const thumbcache = findThumbcacheValue();
252          if ((next.shumeiId === '' || next.shumeiId == null) && thumbcache) {
253            next.shumeiId = thumbcache;
254          }
255          if ((next.deviceid === '' || next.deviceid == null) && thumbcache) {
256            next.deviceid = thumbcache;
257          }
258          return next;
259        };
260  
261        const resolvedPayload = mode === 'reply'
262          ? resolveReplyPayload(payload)
263          : resolveDefaultPayload(payload);
264  
265        try {
266          const response = await fetch(url, {
267            method: 'POST',
268            credentials: 'include',
269            headers: {
270              'Content-Type': 'application/json'
271            },
272            body: JSON.stringify(resolvedPayload)
273          });
274  
275          const text = await response.text();
276          let data = null;
277          try {
278            data = text ? JSON.parse(text) : null;
279          } catch {
280            data = text ? { message: text } : null;
281          }
282  
283          return {
284            ok: response.ok,
285            status: response.status,
286            data
287          };
288        } catch (error) {
289          return {
290            ok: false,
291            error: error instanceof Error ? error.message : String(error)
292          };
293        }
294      })()
295    `;
296  }
297  /**
298   * Execute authenticated Hupu JSON requests inside the browser page so
299   * cookies and the thread referer come from the live logged-in session.
300   */
301  export async function postHupuJson(page, tid, apiUrl, body, actionLabel, mode = 'default') {
302      const referer = getHupuThreadUrl(tid);
303      await page.goto(referer);
304      const result = await page.evaluate(buildBrowserJsonPostScript(apiUrl, body, mode));
305      if (!result || typeof result !== 'object') {
306          throw new CommandExecutionError(`${actionLabel} failed: invalid browser response`);
307      }
308      if (result.status === 401 || result.status === 403) {
309          throw new AuthRequiredError('bbs.hupu.com', `${actionLabel} failed: please log in to Hupu first`);
310      }
311      if (result.error) {
312          throw new CommandExecutionError(`${actionLabel} failed: ${result.error}`);
313      }
314      if (!result.ok) {
315          const detail = result.data?.msg || result.data?.message || `HTTP ${result.status ?? 'unknown'}`;
316          throw new CommandExecutionError(`${actionLabel} failed: ${detail}`);
317      }
318      return result.data ?? {};
319  }