book.js
1 import { cli, Strategy } from '@jackwener/opencli/registry'; 2 import { CliError } from '@jackwener/opencli/errors'; 3 import { fetchPrivateApi, fetchWebApi, resolveShelfReader, WEREAD_UA, WEREAD_WEB_ORIGIN, } from './utils.js'; 4 function decodeHtmlText(value) { 5 return value 6 .replace(/<[^>]+>/g, '') 7 .replace(/&#x([0-9a-fA-F]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16))) 8 .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) 9 .replace(/ /g, ' ') 10 .replace(/&/g, '&') 11 .replace(/"/g, '"') 12 .trim(); 13 } 14 function normalizeSearchText(value) { 15 return value.replace(/\s+/g, ' ').trim(); 16 } 17 function buildSearchIdentity(title, author) { 18 return `${normalizeSearchText(title)}\u0000${normalizeSearchText(author)}`; 19 } 20 function countSearchTitles(entries) { 21 const counts = new Map(); 22 for (const entry of entries) { 23 const key = normalizeSearchText(entry.title); 24 if (!key) 25 continue; 26 counts.set(key, (counts.get(key) || 0) + 1); 27 } 28 return counts; 29 } 30 function countSearchIdentities(entries) { 31 const counts = new Map(); 32 for (const entry of entries) { 33 const key = buildSearchIdentity(entry.title, entry.author); 34 if (!normalizeSearchText(entry.title) || !normalizeSearchText(entry.author)) 35 continue; 36 counts.set(key, (counts.get(key) || 0) + 1); 37 } 38 return counts; 39 } 40 export function strictTitleFromWereadDocumentTitle(rawTitle) { 41 const suffix = ' - 微信读书'; 42 const normalized = String(rawTitle || '').trim(); 43 if (!normalized.endsWith(suffix)) 44 return ''; 45 const base = normalized.slice(0, -suffix.length).trim(); 46 // Only accept the title when WeRead exposes the strict "<title> - 微信读书" 47 // shape. If extra separators remain, the page title is ambiguous. 48 return base.includes(' - ') ? '' : base; 49 } 50 export function extractReaderFallbackMetadata(doc) { 51 const text = (node) => node?.textContent?.trim() || ''; 52 const firstText = (...sels) => { for (const s of sels) { 53 const v = text(doc.querySelector(s)); 54 if (v) 55 return v; 56 } return ''; }; 57 const bodyText = doc.body?.innerText?.replace(/\s+/g, ' ').trim() || ''; 58 const extractRating = () => { 59 const match = bodyText.match(/微信读书推荐值\s*([0-9.]+%)/); 60 return match ? match[1] : ''; 61 }; 62 const extractPublisher = () => { 63 const direct = text(doc.querySelector('.introDialog_content_pub_line')); 64 return direct.startsWith('出版社') ? direct.replace(/^出版社\s*/, '').trim() : ''; 65 }; 66 const extractIntro = () => { 67 const selectors = [ 68 '.horizontalReaderCoverPage_content_bookInfo_intro', 69 '.wr_flyleaf_page_bookIntro_content', 70 '.introDialog_content_intro_para', 71 ]; 72 for (const selector of selectors) { 73 const value = text(doc.querySelector(selector)); 74 if (value) 75 return value; 76 } 77 return ''; 78 }; 79 const categorySource = Array.from(doc.scripts || []) 80 .map((script) => script.textContent || '') 81 .find((scriptText) => scriptText.includes('"category"')) || ''; 82 const categoryMatch = categorySource.match(/"category"\s*:\s*"([^"]+)"/); 83 const title = firstText('.horizontalReaderCoverPage_content_bookTitle', '.wr_flyleaf_page_bookInfo_bookTitle', '.outline_book_detail_header_title', '.readerTopBar_title_link') || strictTitleFromWereadDocumentTitle(doc.title || ''); 84 const author = firstText('.horizontalReaderCoverPage_content_author', '.wr_flyleaf_page_bookInfo_author', '.outline_book_detail_header_author'); 85 return { 86 title, 87 author, 88 publisher: extractPublisher(), 89 intro: extractIntro(), 90 category: categoryMatch ? categoryMatch[1].trim() : '', 91 rating: extractRating(), 92 metadataReady: Boolean(title || author), 93 }; 94 } 95 /** 96 * Reuse the public search page as a last-resort reader URL source when the 97 * cached shelf page cannot provide a trustworthy bookId-to-reader mapping. 98 */ 99 async function resolveSearchReaderUrl(title, author) { 100 const normalizedTitle = normalizeSearchText(title); 101 const normalizedAuthor = normalizeSearchText(author); 102 if (!normalizedTitle) 103 return ''; 104 try { 105 const [data, htmlEntries] = await Promise.all([ 106 fetchWebApi('/search/global', { keyword: normalizedTitle }), 107 (async () => { 108 const url = new URL('/web/search/books', WEREAD_WEB_ORIGIN); 109 url.searchParams.set('keyword', normalizedTitle); 110 const resp = await fetch(url.toString(), { 111 headers: { 'User-Agent': WEREAD_UA }, 112 }); 113 if (!resp.ok) 114 return []; 115 const html = await resp.text(); 116 const items = Array.from(html.matchAll(/<li[^>]*class="wr_bookList_item"[^>]*>([\s\S]*?)<\/li>/g)); 117 return items.map((match) => { 118 const chunk = match[1]; 119 const hrefMatch = chunk.match(/<a[^>]*href="([^"]+)"[^>]*class="wr_bookList_item_link"[^>]*>|<a[^>]*class="wr_bookList_item_link"[^>]*href="([^"]+)"[^>]*>/); 120 const titleMatch = chunk.match(/<p[^>]*class="wr_bookList_item_title"[^>]*>([\s\S]*?)<\/p>/); 121 const authorMatch = chunk.match(/<p[^>]*class="wr_bookList_item_author"[^>]*>([\s\S]*?)<\/p>/); 122 const href = hrefMatch?.[1] || hrefMatch?.[2] || ''; 123 return { 124 title: decodeHtmlText(titleMatch?.[1] || ''), 125 author: decodeHtmlText(authorMatch?.[1] || ''), 126 url: href ? new URL(href, WEREAD_WEB_ORIGIN).toString() : '', 127 }; 128 }).filter((entry) => entry.title && entry.url); 129 })(), 130 ]); 131 const books = Array.isArray(data?.books) ? data.books : []; 132 const apiIdentityCounts = countSearchIdentities(books.map((item) => ({ 133 title: item.bookInfo?.title ?? '', 134 author: item.bookInfo?.author ?? '', 135 }))); 136 const htmlIdentityCounts = countSearchIdentities(htmlEntries.filter((entry) => entry.author)); 137 const identityKey = buildSearchIdentity(normalizedTitle, normalizedAuthor); 138 if (normalizedAuthor && 139 (apiIdentityCounts.get(identityKey) || 0) === 1 && 140 (htmlIdentityCounts.get(identityKey) || 0) === 1) { 141 const exactMatch = htmlEntries.find((entry) => buildSearchIdentity(entry.title, entry.author) === identityKey); 142 if (exactMatch?.url) 143 return exactMatch.url; 144 } 145 const sameTitleHtmlEntries = htmlEntries.filter((entry) => normalizeSearchText(entry.title) === normalizedTitle); 146 if (normalizedAuthor && sameTitleHtmlEntries.some((entry) => normalizeSearchText(entry.author))) { 147 return ''; 148 } 149 const apiTitleCounts = countSearchTitles(books.map((item) => ({ title: item.bookInfo?.title ?? '' }))); 150 const htmlTitleCounts = countSearchTitles(htmlEntries); 151 if ((apiTitleCounts.get(normalizedTitle) || 0) !== 1 || (htmlTitleCounts.get(normalizedTitle) || 0) !== 1) { 152 return ''; 153 } 154 return htmlEntries.find((entry) => normalizeSearchText(entry.title) === normalizedTitle)?.url || ''; 155 } 156 catch { 157 return ''; 158 } 159 } 160 /** 161 * Read visible book metadata from the web reader cover/flyleaf page. 162 * This path is used as a fallback when the private API session has expired. 163 */ 164 async function loadReaderFallbackResult(page, readerUrl) { 165 await page.goto(readerUrl); 166 await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle, .readerTopBar_title_link', timeout: 10 }); 167 const result = await page.evaluate(` 168 (${extractReaderFallbackMetadata.toString()})(document) 169 `); 170 return { 171 title: String(result?.title || '').trim(), 172 author: String(result?.author || '').trim(), 173 publisher: String(result?.publisher || '').trim(), 174 intro: String(result?.intro || '').trim(), 175 category: String(result?.category || '').trim(), 176 rating: String(result?.rating || '').trim(), 177 metadataReady: result?.metadataReady === true, 178 }; 179 } 180 cli({ 181 site: 'weread', 182 name: 'book', 183 description: 'View book details on WeRead', 184 domain: 'weread.qq.com', 185 strategy: Strategy.COOKIE, 186 args: [ 187 { name: 'book-id', positional: true, required: true, help: 'Book ID from search or shelf results' }, 188 ], 189 columns: ['title', 'author', 'publisher', 'intro', 'category', 'rating'], 190 func: async (page, args) => { 191 const bookId = String(args['book-id'] || '').trim(); 192 try { 193 const data = await fetchPrivateApi(page, '/book/info', { bookId }); 194 // newRating is 0-1000 scale per community docs; needs runtime verification 195 const rating = data.newRating ? `${(data.newRating / 10).toFixed(1)}%` : '-'; 196 return [{ 197 title: data.title ?? '', 198 author: data.author ?? '', 199 publisher: data.publisher ?? '', 200 intro: data.intro ?? '', 201 category: data.category ?? '', 202 rating, 203 }]; 204 } 205 catch (error) { 206 if (!(error instanceof CliError) || error.code !== 'AUTH_REQUIRED') { 207 throw error; 208 } 209 const { readerUrl: resolvedReaderUrl, snapshot } = await resolveShelfReader(page, bookId); 210 let readerUrl = resolvedReaderUrl; 211 if (!readerUrl) { 212 const cachedBook = snapshot.rawBooks.find((book) => String(book?.bookId || '').trim() === bookId); 213 readerUrl = await resolveSearchReaderUrl(String(cachedBook?.title || ''), String(cachedBook?.author || '')); 214 } 215 if (!readerUrl) { 216 throw error; 217 } 218 const data = await loadReaderFallbackResult(page, readerUrl); 219 if (!data.metadataReady || !data.title) { 220 throw error; 221 } 222 return [{ 223 title: data.title, 224 author: data.author, 225 publisher: data.publisher, 226 intro: data.intro, 227 category: data.category, 228 rating: data.rating, 229 }]; 230 } 231 }, 232 });