/ clis / weread / utils.js
utils.js
  1  /**
  2   * WeRead shared helpers: fetch wrappers and formatting.
  3   *
  4   * Two API domains:
  5   * - WEB_API (weread.qq.com/web/*): public, Node.js fetch
  6   * - API (i.weread.qq.com/*): private, Node.js fetch with cookies from browser
  7   */
  8  import { CliError } from '@jackwener/opencli/errors';
  9  export const WEREAD_DOMAIN = 'weread.qq.com';
 10  export const WEREAD_WEB_ORIGIN = `https://${WEREAD_DOMAIN}`;
 11  export const WEREAD_SHELF_URL = `${WEREAD_WEB_ORIGIN}/web/shelf`;
 12  const WEB_API = `${WEREAD_WEB_ORIGIN}/web`;
 13  const API = `https://i.${WEREAD_DOMAIN}`;
 14  export const WEREAD_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
 15  const WEREAD_AUTH_ERRCODES = new Set([-2010, -2012]);
 16  function buildCookieHeader(cookies) {
 17      return cookies.map((cookie) => `${cookie.name}=${cookie.value}`).join('; ');
 18  }
 19  function isAuthErrorResponse(resp, data) {
 20      return resp.status === 401 || WEREAD_AUTH_ERRCODES.has(Number(data?.errcode));
 21  }
 22  function getCurrentVid(cookies) {
 23      return String(cookies.find((cookie) => cookie.name === 'wr_vid')?.value || '').trim();
 24  }
 25  function getWebShelfStorageKeys(currentVid) {
 26      return {
 27          rawBooksKey: `shelf:rawBooks:${currentVid}`,
 28          shelfIndexesKey: `shelf:shelfIndexes:${currentVid}`,
 29      };
 30  }
 31  function normalizeWebShelfSnapshot(value) {
 32      return {
 33          cacheFound: value?.cacheFound === true,
 34          rawBooks: Array.isArray(value?.rawBooks) ? value.rawBooks : [],
 35          shelfIndexes: Array.isArray(value?.shelfIndexes) ? value.shelfIndexes : [],
 36      };
 37  }
 38  function buildShelfSnapshotPollScript(storageKeys, requireTrustedIndexes) {
 39      return `
 40      (() => new Promise((resolve) => {
 41        const deadline = Date.now() + 5000;
 42        const rawBooksKey = ${JSON.stringify(storageKeys.rawBooksKey)};
 43        const shelfIndexesKey = ${JSON.stringify(storageKeys.shelfIndexesKey)};
 44        const requireTrustedIndexes = ${JSON.stringify(requireTrustedIndexes)};
 45  
 46        const readJson = (raw) => {
 47          if (typeof raw !== 'string') return null;
 48          try {
 49            return JSON.parse(raw);
 50          } catch {
 51            return null;
 52          }
 53        };
 54  
 55        const collectBookIds = (items) => Array.isArray(items)
 56          ? Array.from(new Set(items.map((item) => String(item?.bookId || '').trim()).filter(Boolean)))
 57          : [];
 58  
 59        // Mirror of getTrustedIndexedBookIds in Node.js — keep in sync
 60        const hasTrustedIndexes = (rawBooks, shelfIndexes) => {
 61          const rawBookIds = collectBookIds(rawBooks);
 62          if (rawBookIds.length === 0) return false;
 63  
 64          const rawBookIdSet = new Set(rawBookIds);
 65          const projectedIndexedBookIds = Array.isArray(shelfIndexes)
 66            ? Array.from(new Set(
 67                shelfIndexes
 68                  .filter((entry) => Number.isFinite(entry?.idx))
 69                  .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER))
 70                  .map((entry) => String(entry?.bookId || '').trim())
 71                  .filter((bookId) => rawBookIdSet.has(bookId)),
 72              ))
 73            : [];
 74  
 75          return projectedIndexedBookIds.length === rawBookIds.length;
 76        };
 77  
 78        const poll = () => {
 79          const rawBooks = readJson(localStorage.getItem(rawBooksKey));
 80          const shelfIndexes = readJson(localStorage.getItem(shelfIndexesKey));
 81          const cacheFound = Array.isArray(rawBooks);
 82          const ready = cacheFound && (!requireTrustedIndexes || hasTrustedIndexes(rawBooks, shelfIndexes));
 83  
 84          if (ready || Date.now() >= deadline) {
 85            resolve({
 86              cacheFound,
 87              rawBooks: Array.isArray(rawBooks) ? rawBooks : [],
 88              shelfIndexes: Array.isArray(shelfIndexes) ? shelfIndexes : [],
 89            });
 90            return;
 91          }
 92  
 93          setTimeout(poll, 100);
 94        };
 95  
 96        poll();
 97      }))
 98    `;
 99  }
100  /**
101   * Fetch a public WeRead web endpoint (Node.js direct fetch).
102   * Used by search and ranking commands (browser: false).
103   */
104  export async function fetchWebApi(path, params) {
105      const url = new URL(`${WEB_API}${path}`);
106      if (params) {
107          for (const [k, v] of Object.entries(params))
108              url.searchParams.set(k, v);
109      }
110      const resp = await fetch(url.toString(), {
111          headers: { 'User-Agent': WEREAD_UA },
112      });
113      if (!resp.ok) {
114          throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
115      }
116      try {
117          return await resp.json();
118      }
119      catch {
120          throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
121      }
122  }
123  /**
124   * Fetch a private WeRead API endpoint with cookies extracted from the browser.
125   * The HTTP request itself runs in Node.js to avoid page-context CORS failures.
126   *
127   * Cookies are collected from both the API subdomain (i.weread.qq.com) and the
128   * main domain (weread.qq.com). WeRead may set auth cookies as host-only on
129   * weread.qq.com, which won't match i.weread.qq.com in a URL-based lookup.
130   */
131  export async function fetchPrivateApi(page, path, params) {
132      const url = new URL(`${API}${path}`);
133      if (params) {
134          for (const [k, v] of Object.entries(params))
135              url.searchParams.set(k, v);
136      }
137      const urlStr = url.toString();
138      // Merge cookies from both domains; API-domain cookies take precedence on name collision
139      const [apiCookies, domainCookies] = await Promise.all([
140          page.getCookies({ url: urlStr }),
141          page.getCookies({ domain: WEREAD_DOMAIN }),
142      ]);
143      const merged = new Map();
144      for (const c of domainCookies)
145          merged.set(c.name, c);
146      for (const c of apiCookies)
147          merged.set(c.name, c);
148      const cookieHeader = buildCookieHeader(Array.from(merged.values()));
149      let resp;
150      try {
151          resp = await fetch(urlStr, {
152              headers: {
153                  'User-Agent': WEREAD_UA,
154                  'Origin': 'https://weread.qq.com',
155                  'Referer': 'https://weread.qq.com/',
156                  ...(cookieHeader ? { 'Cookie': cookieHeader } : {}),
157              },
158          });
159      }
160      catch (error) {
161          throw new CliError('FETCH_ERROR', `Failed to fetch ${path}: ${error instanceof Error ? error.message : String(error)}`, 'WeRead API may be temporarily unavailable');
162      }
163      let data;
164      try {
165          data = await resp.json();
166      }
167      catch {
168          throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page');
169      }
170      if (isAuthErrorResponse(resp, data)) {
171          throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first');
172      }
173      if (!resp.ok) {
174          throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable');
175      }
176      if (data?.errcode != null && data.errcode !== 0) {
177          throw new CliError('API_ERROR', data.errmsg ?? `WeRead API error ${data.errcode}`);
178      }
179      return data;
180  }
181  function getUniqueRawBookIds(snapshot) {
182      return Array.from(new Set(snapshot.rawBooks
183          .map((book) => String(book?.bookId || '').trim())
184          .filter(Boolean)));
185  }
186  /** Mirror of hasTrustedIndexes in buildShelfSnapshotPollScript — keep in sync */
187  function getTrustedIndexedBookIds(snapshot) {
188      const rawBookIds = getUniqueRawBookIds(snapshot);
189      if (rawBookIds.length === 0)
190          return [];
191      const rawBookIdSet = new Set(rawBookIds);
192      const projectedIndexedBookIds = Array.from(new Set(snapshot.shelfIndexes
193          .filter((entry) => Number.isFinite(entry?.idx))
194          .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER))
195          .map((entry) => String(entry?.bookId || '').trim())
196          .filter((bookId) => rawBookIdSet.has(bookId))));
197      return projectedIndexedBookIds.length === rawBookIds.length ? projectedIndexedBookIds : [];
198  }
199  /**
200   * Build stable shelf records from the web cache plus optional rendered reader URLs.
201   * We only trust shelfIndexes when it fully covers the same bookId set as rawBooks;
202   * otherwise we keep rawBooks order to avoid partial hydration reordering entries.
203   */
204  export function buildWebShelfEntries(snapshot, readerUrls = []) {
205      const rawBookIds = getUniqueRawBookIds(snapshot);
206      const trustedIndexedBookIds = getTrustedIndexedBookIds(snapshot);
207      const orderedBookIds = trustedIndexedBookIds.length > 0 ? trustedIndexedBookIds : rawBookIds;
208      const rawBookById = new Map();
209      for (const book of snapshot.rawBooks) {
210          const bookId = String(book?.bookId || '').trim();
211          if (!bookId || rawBookById.has(bookId))
212              continue;
213          rawBookById.set(bookId, book);
214      }
215      return orderedBookIds.map((bookId, index) => {
216          const book = rawBookById.get(bookId);
217          return {
218              bookId,
219              title: String(book?.title || '').trim(),
220              author: String(book?.author || '').trim(),
221              readerUrl: String(readerUrls[index] || '').trim(),
222          };
223      });
224  }
225  /**
226   * Internal: load shelf snapshot and return the currentVid alongside it,
227   * so callers like resolveShelfReaderUrl can reuse it without a second getCookies.
228   */
229  async function loadWebShelfSnapshotWithVid(page) {
230      await page.goto(WEREAD_SHELF_URL);
231      const cookies = await page.getCookies({ domain: WEREAD_DOMAIN });
232      const currentVid = getCurrentVid(cookies);
233      if (!currentVid) {
234          return { snapshot: { cacheFound: false, rawBooks: [], shelfIndexes: [] }, currentVid: '' };
235      }
236      const result = await page.evaluate(buildShelfSnapshotPollScript(getWebShelfStorageKeys(currentVid), false));
237      return {
238          snapshot: normalizeWebShelfSnapshot(result),
239          currentVid,
240      };
241  }
242  /**
243   * Read the structured shelf cache from the WeRead shelf page.
244   * The page hydrates localStorage asynchronously, so we poll briefly before
245   * giving up and treating the cache as unavailable for the current session.
246   */
247  export async function loadWebShelfSnapshot(page) {
248      const { snapshot } = await loadWebShelfSnapshotWithVid(page);
249      return snapshot;
250  }
251  /**
252   * `book` needs a trustworthy `bookId -> readerUrl` mapping, which may lag behind
253   * the first rawBooks cache hydration. Keep the fast shelf fallback path separate
254   * and only wait here, with a bounded poll, when resolving reader URLs.
255   */
256  async function waitForTrustedWebShelfSnapshot(page, snapshot, currentVid) {
257      // Cache not available; nothing to wait for
258      if (!snapshot.cacheFound)
259          return snapshot;
260      // Indexes already fully cover rawBooks; no need to re-poll
261      if (getTrustedIndexedBookIds(snapshot).length > 0)
262          return snapshot;
263      if (!currentVid)
264          return snapshot;
265      const result = await page.evaluate(buildShelfSnapshotPollScript(getWebShelfStorageKeys(currentVid), true));
266      return normalizeWebShelfSnapshot(result);
267  }
268  /**
269   * Resolve a shelf bookId to the current web reader URL by pairing structured
270   * shelf cache order with the visible shelf links rendered on the page.
271   */
272  export async function resolveShelfReaderUrl(page, bookId) {
273      const resolution = await resolveShelfReader(page, bookId);
274      return resolution.readerUrl;
275  }
276  /**
277   * Resolve the current reader URL for a shelf entry and return the parsed shelf
278   * snapshot used during resolution, so callers can reuse cached title/author
279   * metadata without loading the shelf page twice.
280   */
281  export async function resolveShelfReader(page, bookId) {
282      const { snapshot: initialSnapshot, currentVid } = await loadWebShelfSnapshotWithVid(page);
283      const snapshot = await waitForTrustedWebShelfSnapshot(page, initialSnapshot, currentVid);
284      if (!snapshot.cacheFound) {
285          return { snapshot, readerUrl: null };
286      }
287      const rawBookIds = getUniqueRawBookIds(snapshot);
288      const trustedIndexedBookIds = getTrustedIndexedBookIds(snapshot);
289      const canUseRawOrderFallback = trustedIndexedBookIds.length === 0
290          && rawBookIds.length > 0
291          && snapshot.shelfIndexes.length === 0;
292      if (trustedIndexedBookIds.length === 0 && !canUseRawOrderFallback) {
293          return { snapshot, readerUrl: null };
294      }
295      const readerUrls = await page.evaluate(`
296      (() => Array.from(document.querySelectorAll('a.shelfBook[href]'))
297        .map((anchor) => {
298          const href = anchor.getAttribute('href') || '';
299          return href ? new URL(href, location.origin).toString() : '';
300        })
301        .filter(Boolean))
302    `);
303      const expectedEntryCount = trustedIndexedBookIds.length > 0 ? trustedIndexedBookIds.length : rawBookIds.length;
304      if (readerUrls.length !== expectedEntryCount) {
305          return { snapshot, readerUrl: null };
306      }
307      const entries = buildWebShelfEntries(snapshot, readerUrls);
308      const entry = entries.find((candidate) => candidate.bookId === bookId);
309      return {
310          snapshot,
311          readerUrl: entry?.readerUrl || null,
312      };
313  }
314  /** Format a Unix timestamp (seconds) to YYYY-MM-DD in UTC+8. Returns '-' for invalid input. */
315  export function formatDate(ts) {
316      if (!Number.isFinite(ts) || ts <= 0)
317          return '-';
318      // WeRead timestamps are China-centric; offset to UTC+8 to avoid off-by-one near midnight
319      const d = new Date(ts * 1000 + 8 * 3600_000);
320      return d.toISOString().slice(0, 10);
321  }