Cradicle Explorer

book.js
  1  import { cli, Strategy } from '@jackwener/opencli/registry';
  2  import { CliError } from '@jackwener/opencli/errors';
  3  import { fetchPrivateApi, fetchWebApi, resolveShelfReader, WEREAD_UA, WEREAD_WEB_ORIGIN, } from './utils.js';
  4  function decodeHtmlText(value) {
  5      return value
  6          .replace(/<[^>]+>/g, '')
  7          .replace(/&#x([0-9a-fA-F]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16)))
  8          .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
  9          .replace(/&nbsp;/g, ' ')
 10          .replace(/&amp;/g, '&')
 11          .replace(/&quot;/g, '"')
 12          .trim();
 13  }
 14  function normalizeSearchText(value) {
 15      return value.replace(/\s+/g, ' ').trim();
 16  }
 17  function buildSearchIdentity(title, author) {
 18      return `${normalizeSearchText(title)}\u0000${normalizeSearchText(author)}`;
 19  }
 20  function countSearchTitles(entries) {
 21      const counts = new Map();
 22      for (const entry of entries) {
 23          const key = normalizeSearchText(entry.title);
 24          if (!key)
 25              continue;
 26          counts.set(key, (counts.get(key) || 0) + 1);
 27      }
 28      return counts;
 29  }
 30  function countSearchIdentities(entries) {
 31      const counts = new Map();
 32      for (const entry of entries) {
 33          const key = buildSearchIdentity(entry.title, entry.author);
 34          if (!normalizeSearchText(entry.title) || !normalizeSearchText(entry.author))
 35              continue;
 36          counts.set(key, (counts.get(key) || 0) + 1);
 37      }
 38      return counts;
 39  }
 40  export function strictTitleFromWereadDocumentTitle(rawTitle) {
 41      const suffix = ' - 微信读书';
 42      const normalized = String(rawTitle || '').trim();
 43      if (!normalized.endsWith(suffix))
 44          return '';
 45      const base = normalized.slice(0, -suffix.length).trim();
 46      // Only accept the title when WeRead exposes the strict "<title> - 微信读书"
 47      // shape. If extra separators remain, the page title is ambiguous.
 48      return base.includes(' - ') ? '' : base;
 49  }
 50  export function extractReaderFallbackMetadata(doc) {
 51      const text = (node) => node?.textContent?.trim() || '';
 52      const firstText = (...sels) => { for (const s of sels) {
 53          const v = text(doc.querySelector(s));
 54          if (v)
 55              return v;
 56      } return ''; };
 57      const bodyText = doc.body?.innerText?.replace(/\s+/g, ' ').trim() || '';
 58      const extractRating = () => {
 59          const match = bodyText.match(/微信读书推荐值\s*([0-9.]+%)/);
 60          return match ? match[1] : '';
 61      };
 62      const extractPublisher = () => {
 63          const direct = text(doc.querySelector('.introDialog_content_pub_line'));
 64          return direct.startsWith('出版社') ? direct.replace(/^出版社\s*/, '').trim() : '';
 65      };
 66      const extractIntro = () => {
 67          const selectors = [
 68              '.horizontalReaderCoverPage_content_bookInfo_intro',
 69              '.wr_flyleaf_page_bookIntro_content',
 70              '.introDialog_content_intro_para',
 71          ];
 72          for (const selector of selectors) {
 73              const value = text(doc.querySelector(selector));
 74              if (value)
 75                  return value;
 76          }
 77          return '';
 78      };
 79      const categorySource = Array.from(doc.scripts || [])
 80          .map((script) => script.textContent || '')
 81          .find((scriptText) => scriptText.includes('"category"')) || '';
 82      const categoryMatch = categorySource.match(/"category"\s*:\s*"([^"]+)"/);
 83      const title = firstText('.horizontalReaderCoverPage_content_bookTitle', '.wr_flyleaf_page_bookInfo_bookTitle', '.outline_book_detail_header_title', '.readerTopBar_title_link') || strictTitleFromWereadDocumentTitle(doc.title || '');
 84      const author = firstText('.horizontalReaderCoverPage_content_author', '.wr_flyleaf_page_bookInfo_author', '.outline_book_detail_header_author');
 85      return {
 86          title,
 87          author,
 88          publisher: extractPublisher(),
 89          intro: extractIntro(),
 90          category: categoryMatch ? categoryMatch[1].trim() : '',
 91          rating: extractRating(),
 92          metadataReady: Boolean(title || author),
 93      };
 94  }
 95  /**
 96   * Reuse the public search page as a last-resort reader URL source when the
 97   * cached shelf page cannot provide a trustworthy bookId-to-reader mapping.
 98   */
 99  async function resolveSearchReaderUrl(title, author) {
100      const normalizedTitle = normalizeSearchText(title);
101      const normalizedAuthor = normalizeSearchText(author);
102      if (!normalizedTitle)
103          return '';
104      try {
105          const [data, htmlEntries] = await Promise.all([
106              fetchWebApi('/search/global', { keyword: normalizedTitle }),
107              (async () => {
108                  const url = new URL('/web/search/books', WEREAD_WEB_ORIGIN);
109                  url.searchParams.set('keyword', normalizedTitle);
110                  const resp = await fetch(url.toString(), {
111                      headers: { 'User-Agent': WEREAD_UA },
112                  });
113                  if (!resp.ok)
114                      return [];
115                  const html = await resp.text();
116                  const items = Array.from(html.matchAll(/<li[^>]*class="wr_bookList_item"[^>]*>([\s\S]*?)<\/li>/g));
117                  return items.map((match) => {
118                      const chunk = match[1];
119                      const hrefMatch = chunk.match(/<a[^>]*href="([^"]+)"[^>]*class="wr_bookList_item_link"[^>]*>|<a[^>]*class="wr_bookList_item_link"[^>]*href="([^"]+)"[^>]*>/);
120                      const titleMatch = chunk.match(/<p[^>]*class="wr_bookList_item_title"[^>]*>([\s\S]*?)<\/p>/);
121                      const authorMatch = chunk.match(/<p[^>]*class="wr_bookList_item_author"[^>]*>([\s\S]*?)<\/p>/);
122                      const href = hrefMatch?.[1] || hrefMatch?.[2] || '';
123                      return {
124                          title: decodeHtmlText(titleMatch?.[1] || ''),
125                          author: decodeHtmlText(authorMatch?.[1] || ''),
126                          url: href ? new URL(href, WEREAD_WEB_ORIGIN).toString() : '',
127                      };
128                  }).filter((entry) => entry.title && entry.url);
129              })(),
130          ]);
131          const books = Array.isArray(data?.books) ? data.books : [];
132          const apiIdentityCounts = countSearchIdentities(books.map((item) => ({
133              title: item.bookInfo?.title ?? '',
134              author: item.bookInfo?.author ?? '',
135          })));
136          const htmlIdentityCounts = countSearchIdentities(htmlEntries.filter((entry) => entry.author));
137          const identityKey = buildSearchIdentity(normalizedTitle, normalizedAuthor);
138          if (normalizedAuthor &&
139              (apiIdentityCounts.get(identityKey) || 0) === 1 &&
140              (htmlIdentityCounts.get(identityKey) || 0) === 1) {
141              const exactMatch = htmlEntries.find((entry) => buildSearchIdentity(entry.title, entry.author) === identityKey);
142              if (exactMatch?.url)
143                  return exactMatch.url;
144          }
145          const sameTitleHtmlEntries = htmlEntries.filter((entry) => normalizeSearchText(entry.title) === normalizedTitle);
146          if (normalizedAuthor && sameTitleHtmlEntries.some((entry) => normalizeSearchText(entry.author))) {
147              return '';
148          }
149          const apiTitleCounts = countSearchTitles(books.map((item) => ({ title: item.bookInfo?.title ?? '' })));
150          const htmlTitleCounts = countSearchTitles(htmlEntries);
151          if ((apiTitleCounts.get(normalizedTitle) || 0) !== 1 || (htmlTitleCounts.get(normalizedTitle) || 0) !== 1) {
152              return '';
153          }
154          return htmlEntries.find((entry) => normalizeSearchText(entry.title) === normalizedTitle)?.url || '';
155      }
156      catch {
157          return '';
158      }
159  }
160  /**
161   * Read visible book metadata from the web reader cover/flyleaf page.
162   * This path is used as a fallback when the private API session has expired.
163   */
164  async function loadReaderFallbackResult(page, readerUrl) {
165      await page.goto(readerUrl);
166      await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle, .readerTopBar_title_link', timeout: 10 });
167      const result = await page.evaluate(`
168      (${extractReaderFallbackMetadata.toString()})(document)
169    `);
170      return {
171          title: String(result?.title || '').trim(),
172          author: String(result?.author || '').trim(),
173          publisher: String(result?.publisher || '').trim(),
174          intro: String(result?.intro || '').trim(),
175          category: String(result?.category || '').trim(),
176          rating: String(result?.rating || '').trim(),
177          metadataReady: result?.metadataReady === true,
178      };
179  }
180  cli({
181      site: 'weread',
182      name: 'book',
183      description: 'View book details on WeRead',
184      domain: 'weread.qq.com',
185      strategy: Strategy.COOKIE,
186      args: [
187          { name: 'book-id', positional: true, required: true, help: 'Book ID from search or shelf results' },
188      ],
189      columns: ['title', 'author', 'publisher', 'intro', 'category', 'rating'],
190      func: async (page, args) => {
191          const bookId = String(args['book-id'] || '').trim();
192          try {
193              const data = await fetchPrivateApi(page, '/book/info', { bookId });
194              // newRating is 0-1000 scale per community docs; needs runtime verification
195              const rating = data.newRating ? `${(data.newRating / 10).toFixed(1)}%` : '-';
196              return [{
197                      title: data.title ?? '',
198                      author: data.author ?? '',
199                      publisher: data.publisher ?? '',
200                      intro: data.intro ?? '',
201                      category: data.category ?? '',
202                      rating,
203                  }];
204          }
205          catch (error) {
206              if (!(error instanceof CliError) || error.code !== 'AUTH_REQUIRED') {
207                  throw error;
208              }
209              const { readerUrl: resolvedReaderUrl, snapshot } = await resolveShelfReader(page, bookId);
210              let readerUrl = resolvedReaderUrl;
211              if (!readerUrl) {
212                  const cachedBook = snapshot.rawBooks.find((book) => String(book?.bookId || '').trim() === bookId);
213                  readerUrl = await resolveSearchReaderUrl(String(cachedBook?.title || ''), String(cachedBook?.author || ''));
214              }
215              if (!readerUrl) {
216                  throw error;
217              }
218              const data = await loadReaderFallbackResult(page, readerUrl);
219              if (!data.metadataReady || !data.title) {
220                  throw error;
221              }
222              return [{
223                      title: data.title,
224                      author: data.author,
225                      publisher: data.publisher,
226                      intro: data.intro,
227                      category: data.category,
228                      rating: data.rating,
229                  }];
230          }
231      },
232  });