utils.js
1 /** 2 * WeRead shared helpers: fetch wrappers and formatting. 3 * 4 * Two API domains: 5 * - WEB_API (weread.qq.com/web/*): public, Node.js fetch 6 * - API (i.weread.qq.com/*): private, Node.js fetch with cookies from browser 7 */ 8 import { CliError } from '@jackwener/opencli/errors'; 9 export const WEREAD_DOMAIN = 'weread.qq.com'; 10 export const WEREAD_WEB_ORIGIN = `https://${WEREAD_DOMAIN}`; 11 export const WEREAD_SHELF_URL = `${WEREAD_WEB_ORIGIN}/web/shelf`; 12 const WEB_API = `${WEREAD_WEB_ORIGIN}/web`; 13 const API = `https://i.${WEREAD_DOMAIN}`; 14 export const WEREAD_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'; 15 const WEREAD_AUTH_ERRCODES = new Set([-2010, -2012]); 16 function buildCookieHeader(cookies) { 17 return cookies.map((cookie) => `${cookie.name}=${cookie.value}`).join('; '); 18 } 19 function isAuthErrorResponse(resp, data) { 20 return resp.status === 401 || WEREAD_AUTH_ERRCODES.has(Number(data?.errcode)); 21 } 22 function getCurrentVid(cookies) { 23 return String(cookies.find((cookie) => cookie.name === 'wr_vid')?.value || '').trim(); 24 } 25 function getWebShelfStorageKeys(currentVid) { 26 return { 27 rawBooksKey: `shelf:rawBooks:${currentVid}`, 28 shelfIndexesKey: `shelf:shelfIndexes:${currentVid}`, 29 }; 30 } 31 function normalizeWebShelfSnapshot(value) { 32 return { 33 cacheFound: value?.cacheFound === true, 34 rawBooks: Array.isArray(value?.rawBooks) ? value.rawBooks : [], 35 shelfIndexes: Array.isArray(value?.shelfIndexes) ? value.shelfIndexes : [], 36 }; 37 } 38 function buildShelfSnapshotPollScript(storageKeys, requireTrustedIndexes) { 39 return ` 40 (() => new Promise((resolve) => { 41 const deadline = Date.now() + 5000; 42 const rawBooksKey = ${JSON.stringify(storageKeys.rawBooksKey)}; 43 const shelfIndexesKey = ${JSON.stringify(storageKeys.shelfIndexesKey)}; 44 const requireTrustedIndexes = ${JSON.stringify(requireTrustedIndexes)}; 45 46 const readJson = (raw) => { 47 if (typeof raw !== 'string') return null; 48 try { 49 return JSON.parse(raw); 50 } catch { 51 return null; 52 } 53 }; 54 55 const collectBookIds = (items) => Array.isArray(items) 56 ? Array.from(new Set(items.map((item) => String(item?.bookId || '').trim()).filter(Boolean))) 57 : []; 58 59 // Mirror of getTrustedIndexedBookIds in Node.js — keep in sync 60 const hasTrustedIndexes = (rawBooks, shelfIndexes) => { 61 const rawBookIds = collectBookIds(rawBooks); 62 if (rawBookIds.length === 0) return false; 63 64 const rawBookIdSet = new Set(rawBookIds); 65 const projectedIndexedBookIds = Array.isArray(shelfIndexes) 66 ? Array.from(new Set( 67 shelfIndexes 68 .filter((entry) => Number.isFinite(entry?.idx)) 69 .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER)) 70 .map((entry) => String(entry?.bookId || '').trim()) 71 .filter((bookId) => rawBookIdSet.has(bookId)), 72 )) 73 : []; 74 75 return projectedIndexedBookIds.length === rawBookIds.length; 76 }; 77 78 const poll = () => { 79 const rawBooks = readJson(localStorage.getItem(rawBooksKey)); 80 const shelfIndexes = readJson(localStorage.getItem(shelfIndexesKey)); 81 const cacheFound = Array.isArray(rawBooks); 82 const ready = cacheFound && (!requireTrustedIndexes || hasTrustedIndexes(rawBooks, shelfIndexes)); 83 84 if (ready || Date.now() >= deadline) { 85 resolve({ 86 cacheFound, 87 rawBooks: Array.isArray(rawBooks) ? rawBooks : [], 88 shelfIndexes: Array.isArray(shelfIndexes) ? shelfIndexes : [], 89 }); 90 return; 91 } 92 93 setTimeout(poll, 100); 94 }; 95 96 poll(); 97 })) 98 `; 99 } 100 /** 101 * Fetch a public WeRead web endpoint (Node.js direct fetch). 102 * Used by search and ranking commands (browser: false). 103 */ 104 export async function fetchWebApi(path, params) { 105 const url = new URL(`${WEB_API}${path}`); 106 if (params) { 107 for (const [k, v] of Object.entries(params)) 108 url.searchParams.set(k, v); 109 } 110 const resp = await fetch(url.toString(), { 111 headers: { 'User-Agent': WEREAD_UA }, 112 }); 113 if (!resp.ok) { 114 throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable'); 115 } 116 try { 117 return await resp.json(); 118 } 119 catch { 120 throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page'); 121 } 122 } 123 /** 124 * Fetch a private WeRead API endpoint with cookies extracted from the browser. 125 * The HTTP request itself runs in Node.js to avoid page-context CORS failures. 126 * 127 * Cookies are collected from both the API subdomain (i.weread.qq.com) and the 128 * main domain (weread.qq.com). WeRead may set auth cookies as host-only on 129 * weread.qq.com, which won't match i.weread.qq.com in a URL-based lookup. 130 */ 131 export async function fetchPrivateApi(page, path, params) { 132 const url = new URL(`${API}${path}`); 133 if (params) { 134 for (const [k, v] of Object.entries(params)) 135 url.searchParams.set(k, v); 136 } 137 const urlStr = url.toString(); 138 // Merge cookies from both domains; API-domain cookies take precedence on name collision 139 const [apiCookies, domainCookies] = await Promise.all([ 140 page.getCookies({ url: urlStr }), 141 page.getCookies({ domain: WEREAD_DOMAIN }), 142 ]); 143 const merged = new Map(); 144 for (const c of domainCookies) 145 merged.set(c.name, c); 146 for (const c of apiCookies) 147 merged.set(c.name, c); 148 const cookieHeader = buildCookieHeader(Array.from(merged.values())); 149 let resp; 150 try { 151 resp = await fetch(urlStr, { 152 headers: { 153 'User-Agent': WEREAD_UA, 154 'Origin': 'https://weread.qq.com', 155 'Referer': 'https://weread.qq.com/', 156 ...(cookieHeader ? { 'Cookie': cookieHeader } : {}), 157 }, 158 }); 159 } 160 catch (error) { 161 throw new CliError('FETCH_ERROR', `Failed to fetch ${path}: ${error instanceof Error ? error.message : String(error)}`, 'WeRead API may be temporarily unavailable'); 162 } 163 let data; 164 try { 165 data = await resp.json(); 166 } 167 catch { 168 throw new CliError('PARSE_ERROR', `Invalid JSON response for ${path}`, 'WeRead may have returned an HTML error page'); 169 } 170 if (isAuthErrorResponse(resp, data)) { 171 throw new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first'); 172 } 173 if (!resp.ok) { 174 throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable'); 175 } 176 if (data?.errcode != null && data.errcode !== 0) { 177 throw new CliError('API_ERROR', data.errmsg ?? `WeRead API error ${data.errcode}`); 178 } 179 return data; 180 } 181 function getUniqueRawBookIds(snapshot) { 182 return Array.from(new Set(snapshot.rawBooks 183 .map((book) => String(book?.bookId || '').trim()) 184 .filter(Boolean))); 185 } 186 /** Mirror of hasTrustedIndexes in buildShelfSnapshotPollScript — keep in sync */ 187 function getTrustedIndexedBookIds(snapshot) { 188 const rawBookIds = getUniqueRawBookIds(snapshot); 189 if (rawBookIds.length === 0) 190 return []; 191 const rawBookIdSet = new Set(rawBookIds); 192 const projectedIndexedBookIds = Array.from(new Set(snapshot.shelfIndexes 193 .filter((entry) => Number.isFinite(entry?.idx)) 194 .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER)) 195 .map((entry) => String(entry?.bookId || '').trim()) 196 .filter((bookId) => rawBookIdSet.has(bookId)))); 197 return projectedIndexedBookIds.length === rawBookIds.length ? projectedIndexedBookIds : []; 198 } 199 /** 200 * Build stable shelf records from the web cache plus optional rendered reader URLs. 201 * We only trust shelfIndexes when it fully covers the same bookId set as rawBooks; 202 * otherwise we keep rawBooks order to avoid partial hydration reordering entries. 203 */ 204 export function buildWebShelfEntries(snapshot, readerUrls = []) { 205 const rawBookIds = getUniqueRawBookIds(snapshot); 206 const trustedIndexedBookIds = getTrustedIndexedBookIds(snapshot); 207 const orderedBookIds = trustedIndexedBookIds.length > 0 ? trustedIndexedBookIds : rawBookIds; 208 const rawBookById = new Map(); 209 for (const book of snapshot.rawBooks) { 210 const bookId = String(book?.bookId || '').trim(); 211 if (!bookId || rawBookById.has(bookId)) 212 continue; 213 rawBookById.set(bookId, book); 214 } 215 return orderedBookIds.map((bookId, index) => { 216 const book = rawBookById.get(bookId); 217 return { 218 bookId, 219 title: String(book?.title || '').trim(), 220 author: String(book?.author || '').trim(), 221 readerUrl: String(readerUrls[index] || '').trim(), 222 }; 223 }); 224 } 225 /** 226 * Internal: load shelf snapshot and return the currentVid alongside it, 227 * so callers like resolveShelfReaderUrl can reuse it without a second getCookies. 228 */ 229 async function loadWebShelfSnapshotWithVid(page) { 230 await page.goto(WEREAD_SHELF_URL); 231 const cookies = await page.getCookies({ domain: WEREAD_DOMAIN }); 232 const currentVid = getCurrentVid(cookies); 233 if (!currentVid) { 234 return { snapshot: { cacheFound: false, rawBooks: [], shelfIndexes: [] }, currentVid: '' }; 235 } 236 const result = await page.evaluate(buildShelfSnapshotPollScript(getWebShelfStorageKeys(currentVid), false)); 237 return { 238 snapshot: normalizeWebShelfSnapshot(result), 239 currentVid, 240 }; 241 } 242 /** 243 * Read the structured shelf cache from the WeRead shelf page. 244 * The page hydrates localStorage asynchronously, so we poll briefly before 245 * giving up and treating the cache as unavailable for the current session. 246 */ 247 export async function loadWebShelfSnapshot(page) { 248 const { snapshot } = await loadWebShelfSnapshotWithVid(page); 249 return snapshot; 250 } 251 /** 252 * `book` needs a trustworthy `bookId -> readerUrl` mapping, which may lag behind 253 * the first rawBooks cache hydration. Keep the fast shelf fallback path separate 254 * and only wait here, with a bounded poll, when resolving reader URLs. 255 */ 256 async function waitForTrustedWebShelfSnapshot(page, snapshot, currentVid) { 257 // Cache not available; nothing to wait for 258 if (!snapshot.cacheFound) 259 return snapshot; 260 // Indexes already fully cover rawBooks; no need to re-poll 261 if (getTrustedIndexedBookIds(snapshot).length > 0) 262 return snapshot; 263 if (!currentVid) 264 return snapshot; 265 const result = await page.evaluate(buildShelfSnapshotPollScript(getWebShelfStorageKeys(currentVid), true)); 266 return normalizeWebShelfSnapshot(result); 267 } 268 /** 269 * Resolve a shelf bookId to the current web reader URL by pairing structured 270 * shelf cache order with the visible shelf links rendered on the page. 271 */ 272 export async function resolveShelfReaderUrl(page, bookId) { 273 const resolution = await resolveShelfReader(page, bookId); 274 return resolution.readerUrl; 275 } 276 /** 277 * Resolve the current reader URL for a shelf entry and return the parsed shelf 278 * snapshot used during resolution, so callers can reuse cached title/author 279 * metadata without loading the shelf page twice. 280 */ 281 export async function resolveShelfReader(page, bookId) { 282 const { snapshot: initialSnapshot, currentVid } = await loadWebShelfSnapshotWithVid(page); 283 const snapshot = await waitForTrustedWebShelfSnapshot(page, initialSnapshot, currentVid); 284 if (!snapshot.cacheFound) { 285 return { snapshot, readerUrl: null }; 286 } 287 const rawBookIds = getUniqueRawBookIds(snapshot); 288 const trustedIndexedBookIds = getTrustedIndexedBookIds(snapshot); 289 const canUseRawOrderFallback = trustedIndexedBookIds.length === 0 290 && rawBookIds.length > 0 291 && snapshot.shelfIndexes.length === 0; 292 if (trustedIndexedBookIds.length === 0 && !canUseRawOrderFallback) { 293 return { snapshot, readerUrl: null }; 294 } 295 const readerUrls = await page.evaluate(` 296 (() => Array.from(document.querySelectorAll('a.shelfBook[href]')) 297 .map((anchor) => { 298 const href = anchor.getAttribute('href') || ''; 299 return href ? new URL(href, location.origin).toString() : ''; 300 }) 301 .filter(Boolean)) 302 `); 303 const expectedEntryCount = trustedIndexedBookIds.length > 0 ? trustedIndexedBookIds.length : rawBookIds.length; 304 if (readerUrls.length !== expectedEntryCount) { 305 return { snapshot, readerUrl: null }; 306 } 307 const entries = buildWebShelfEntries(snapshot, readerUrls); 308 const entry = entries.find((candidate) => candidate.bookId === bookId); 309 return { 310 snapshot, 311 readerUrl: entry?.readerUrl || null, 312 }; 313 } 314 /** Format a Unix timestamp (seconds) to YYYY-MM-DD in UTC+8. Returns '-' for invalid input. */ 315 export function formatDate(ts) { 316 if (!Number.isFinite(ts) || ts <= 0) 317 return '-'; 318 // WeRead timestamps are China-centric; offset to UTC+8 to avoid off-by-one near midnight 319 const d = new Date(ts * 1000 + 8 * 3600_000); 320 return d.toISOString().slice(0, 10); 321 }