utils.test.js
1 import vm from 'node:vm'; 2 import { describe, expect, it, vi } from 'vitest'; 3 import { 4 getDoubanPhotoExtension, 5 inferDoubanSearchResultType, 6 loadDoubanSubjectDetail, 7 loadDoubanSubjectPhotos, 8 normalizeDoubanBookSubject, 9 normalizeDoubanSubjectId, 10 promoteDoubanPhotoUrl, 11 resolveDoubanPhotoAssetUrl, 12 searchDouban, 13 } from './utils.js'; 14 15 function createFakeNode(text = '', attrs = {}) { 16 return { 17 textContent: text, 18 getAttribute(name) { 19 return attrs[name] || ''; 20 }, 21 }; 22 } 23 24 function createFakeSearchItem({ title, url, rating, abstract, cover }) { 25 return { 26 querySelector(selector) { 27 if (selector === '.title-text, .title a, .title h3 a, h3 a, a[title]') { 28 return createFakeNode(title, { href: url, title }); 29 } 30 if (selector === '.rating_nums') { 31 return createFakeNode(rating); 32 } 33 if (selector === '.meta.abstract, .meta, .abstract, .subject-abstract, p') { 34 return createFakeNode(abstract); 35 } 36 if (selector === 'a[href*="/subject/"]') { 37 return createFakeNode('', { href: url }); 38 } 39 if (selector === 'img') { 40 return createFakeNode('', { src: cover }); 41 } 42 return null; 43 }, 44 }; 45 } 46 47 async function runSearchEvaluate(script, rawItems, domItems) { 48 const document = { 49 querySelector(selector) { 50 if (selector === '.item-root .title-text, .item-root .title a') { 51 return domItems[0]?.querySelector('.title-text, .title a, .title h3 a, h3 a, a[title]') || null; 52 } 53 if (selector === '.item-root .title-text, .item-root .title a, .result-list .result-item h3 a') { 54 return domItems[0]?.querySelector('.title-text, .title a, .title h3 a, h3 a, a[title]') || null; 55 } 56 return null; 57 }, 58 querySelectorAll(selector) { 59 if (selector === '.item-root') { 60 return domItems; 61 } 62 if (selector === '.item-root, .result-list .result-item') { 63 return domItems; 64 } 65 return []; 66 }, 67 }; 68 69 return vm.runInNewContext(script, { 70 Map, 71 Promise, 72 document, 73 window: { __DATA__: { items: rawItems } }, 74 location: { 75 href: 'https://search.douban.com/movie/subject_search?search_text=%E5%B0%84%E9%9B%95%E8%8B%B1%E9%9B%84%E4%BC%A0', 76 origin: 'https://search.douban.com', 77 }, 78 setTimeout(fn) { 79 fn(); 80 return 0; 81 }, 82 }); 83 } 84 85 describe('douban utils', () => { 86 it('normalizes valid subject ids', () => { 87 expect(normalizeDoubanSubjectId(' 30382501 ')).toBe('30382501'); 88 }); 89 90 it('rejects invalid subject ids', () => { 91 expect(() => normalizeDoubanSubjectId('tt30382501')).toThrow('Invalid Douban subject ID'); 92 }); 93 94 it('promotes thumbnail urls to large photo urls', () => { 95 expect(promoteDoubanPhotoUrl('https://img1.doubanio.com/view/photo/m/public/p2913450214.webp')).toBe('https://img1.doubanio.com/view/photo/l/public/p2913450214.webp'); 96 expect(promoteDoubanPhotoUrl('https://img9.doubanio.com/view/photo/s_ratio_poster/public/p2578474613.jpg')).toBe('https://img9.doubanio.com/view/photo/l/public/p2578474613.jpg'); 97 }); 98 99 it('rejects non-http photo urls during promotion', () => { 100 expect(promoteDoubanPhotoUrl('data:image/gif;base64,abc')).toBe(''); 101 }); 102 103 it('prefers lazy-loaded photo urls over data placeholders', () => { 104 expect(resolveDoubanPhotoAssetUrl([ 105 '', 106 'https://img1.doubanio.com/view/photo/m/public/p2913450214.webp', 107 'data:image/gif;base64,abc', 108 ], 'https://movie.douban.com/subject/30382501/photos?type=Rb')).toBe('https://img1.doubanio.com/view/photo/m/public/p2913450214.webp'); 109 }); 110 111 it('drops unsupported non-http photo urls when no real image url exists', () => { 112 expect(resolveDoubanPhotoAssetUrl(['data:image/gif;base64,abc', 'blob:https://movie.douban.com/example'], 'https://movie.douban.com/subject/30382501/photos?type=Rb')).toBe(''); 113 }); 114 115 it('removes the default photo cap when scanning for an exact photo id', async () => { 116 const evaluate = vi.fn() 117 .mockResolvedValueOnce({ blocked: false, title: 'Some Movie', href: 'https://movie.douban.com/subject/30382501/photos?type=Rb' }) 118 .mockResolvedValueOnce({ 119 subjectId: '30382501', 120 subjectTitle: 'The Wandering Earth 2', 121 type: 'Rb', 122 photos: [ 123 { 124 index: 731, 125 photoId: '2913450215', 126 title: 'Character poster', 127 imageUrl: 'https://img1.doubanio.com/view/photo/l/public/p2913450215.jpg', 128 thumbUrl: 'https://img1.doubanio.com/view/photo/m/public/p2913450215.jpg', 129 detailUrl: 'https://movie.douban.com/photos/photo/2913450215/', 130 page: 25, 131 }, 132 ], 133 }); 134 const page = { 135 goto: vi.fn().mockResolvedValue(undefined), 136 wait: vi.fn().mockResolvedValue(undefined), 137 evaluate, 138 }; 139 await loadDoubanSubjectPhotos(page, '30382501', { 140 type: 'Rb', 141 targetPhotoId: '2913450215', 142 }); 143 const scanScript = evaluate.mock.calls[1]?.[0]; 144 expect(scanScript).toContain('const targetPhotoId = "2913450215";'); 145 expect(scanScript).toContain(`const limit = ${Number.MAX_SAFE_INTEGER};`); 146 expect(scanScript).toContain('for (let pageIndex = 0; photos.length < limit; pageIndex += 1)'); 147 }); 148 149 it('keeps image extensions when download urls contain query params', () => { 150 expect(getDoubanPhotoExtension('https://img1.doubanio.com/view/photo/l/public/p2913450214.webp?foo=1')).toBe('.webp'); 151 expect(getDoubanPhotoExtension('https://img1.doubanio.com/view/photo/l/public/p2913450214.jpeg')).toBe('.jpeg'); 152 }); 153 154 it('maps tv series results to tvshow in searchDouban output', async () => { 155 const domItems = [ 156 createFakeSearchItem({ 157 title: '射雕英雄传 (2017)', 158 url: 'https://movie.douban.com/subject/26663086/', 159 rating: '7.9', 160 abstract: '中国大陆 / 剧情 / 武侠 / 古装 / 45分钟', 161 cover: 'https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2411844029.webp', 162 }), 163 createFakeSearchItem({ 164 title: '射雕英雄传:侠之大者 (2025)', 165 url: 'https://movie.douban.com/subject/36289423/', 166 rating: '5.2', 167 abstract: '中国大陆 / 武侠 / 146分钟', 168 cover: 'https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2917502509.webp', 169 }), 170 ]; 171 const rawItems = [ 172 { 173 id: 26663086, 174 labels: [{ text: '剧集' }, { text: '可播放' }], 175 more_url: "onclick=\"moreurl(this,{from:'mv_subject_search',subject_id:'26663086',is_tv:'1'})\"", 176 }, 177 { 178 id: 36289423, 179 labels: [{ text: '可播放' }], 180 more_url: "onclick=\"moreurl(this,{from:'mv_subject_search',subject_id:'36289423',is_tv:'0'})\"", 181 }, 182 ]; 183 const page = { 184 goto: vi.fn().mockResolvedValue(undefined), 185 wait: vi.fn().mockResolvedValue(undefined), 186 evaluate: vi.fn() 187 .mockResolvedValueOnce({ blocked: false, title: '射雕英雄传 - 电影 - 豆瓣搜索', href: 'https://search.douban.com/movie/subject_search?search_text=%E5%B0%84%E9%9B%95%E8%8B%B1%E9%9B%84%E4%BC%A0' }) 188 .mockImplementationOnce((script) => runSearchEvaluate(script, rawItems, domItems)), 189 }; 190 await expect(searchDouban(page, 'movie', '射雕英雄传', 20)).resolves.toMatchObject([ 191 { id: '26663086', type: 'tvshow', title: '射雕英雄传 (2017)' }, 192 { id: '36289423', type: 'movie', title: '射雕英雄传:侠之大者 (2025)' }, 193 ]); 194 }); 195 196 it('normalizes douban book subject raw data into structured fields', () => { 197 const normalized = normalizeDoubanBookSubject({ 198 id: '2567698', 199 title: '小狗钱钱', 200 subtitle: '让孩子和家长共同成长的财商童话', 201 originalTitle: 'Ein Hund namens Money', 202 infoText: ` 203 作者: [德] 博多·舍费尔 204 出版社: 南海出版公司 205 副标题: 让孩子和家长共同成长的财商童话 206 原作名: Ein Hund namens Money 207 译者: 王钟欣 / 余茜 208 出版年: 2014-1-1 209 页数: 208 210 定价: 26.00元 211 装帧: 平装 212 丛书: 新经典文库·爱心树童书 213 ISBN: 9787544270871 214 `, 215 rating: '8.9', 216 ratingCount: '12345', 217 summary: '理财启蒙故事', 218 cover: 'https://img9.doubanio.com/view/subject/l/public/s29618581.jpg', 219 url: 'https://book.douban.com/subject/2567698/', 220 }); 221 expect(normalized).toMatchObject({ 222 id: '2567698', 223 type: 'book', 224 title: '小狗钱钱', 225 subtitle: '让孩子和家长共同成长的财商童话', 226 originalTitle: 'Ein Hund namens Money', 227 authors: ['[德] 博多·舍费尔'], 228 translators: ['王钟欣', '余茜'], 229 publisher: '南海出版公司', 230 publishDate: '2014-1-1', 231 publishYear: '2014', 232 pageCount: 208, 233 binding: '平装', 234 price: '26.00元', 235 series: '新经典文库·爱心树童书', 236 isbn13: '9787544270871', 237 rating: 8.9, 238 ratingCount: 12345, 239 summary: '理财启蒙故事', 240 cover: 'https://img9.doubanio.com/view/subject/l/public/s29618581.jpg', 241 url: 'https://book.douban.com/subject/2567698/', 242 }); 243 }); 244 245 it('loads book subject details from book.douban.com when type=book', async () => { 246 const page = { 247 goto: vi.fn().mockResolvedValue(undefined), 248 wait: vi.fn().mockResolvedValue(undefined), 249 evaluate: vi.fn() 250 .mockResolvedValueOnce({ blocked: false, title: '小狗钱钱 (豆瓣)', href: 'https://book.douban.com/subject/2567698/' }) 251 .mockResolvedValueOnce({ 252 id: '2567698', 253 title: '小狗钱钱', 254 subtitle: '', 255 originalTitle: '', 256 infoText: ` 257 作者: [德] 博多·舍费尔 258 出版社: 南海出版公司 259 出版年: 2014-1-1 260 ISBN: 9787544270871 261 `, 262 rating: '8.9', 263 ratingCount: '12345', 264 summary: '理财启蒙故事', 265 cover: 'https://img9.doubanio.com/view/subject/l/public/s29618581.jpg', 266 url: 'https://book.douban.com/subject/2567698/', 267 }), 268 }; 269 const detail = await loadDoubanSubjectDetail(page, '2567698', 'book'); 270 expect(page.goto).toHaveBeenCalledWith('https://book.douban.com/subject/2567698/', { 271 waitUntil: 'load', 272 settleMs: 1500, 273 }); 274 expect(page.wait).toHaveBeenCalledWith({ selector: 'h1 span, #info', timeout: 8 }); 275 expect(detail).toMatchObject({ 276 id: '2567698', 277 type: 'book', 278 title: '小狗钱钱', 279 authors: ['[德] 博多·舍费尔'], 280 publisher: '南海出版公司', 281 isbn13: '9787544270871', 282 rating: 8.9, 283 ratingCount: 12345, 284 url: 'https://book.douban.com/subject/2567698/', 285 }); 286 }); 287 288 it('retries transient detached navigation errors when loading douban search results', async () => { 289 const page = { 290 goto: vi.fn() 291 .mockRejectedValueOnce(new Error('Detached while handling command')) 292 .mockResolvedValueOnce(undefined), 293 wait: vi.fn().mockResolvedValue(undefined), 294 evaluate: vi.fn() 295 .mockResolvedValueOnce({ 296 blocked: false, 297 title: '经济学思维 - 豆瓣搜索', 298 href: 'https://search.douban.com/book/subject_search?search_text=%E7%BB%8F%E6%B5%8E%E5%AD%A6%E6%80%9D%E7%BB%B4&cat=1001', 299 }) 300 .mockResolvedValueOnce([ 301 { 302 rank: 1, 303 id: '26895402', 304 type: 'book', 305 title: '经济学思维', 306 rating: 7.9, 307 abstract: '李子畅 / 中信出版社 / 2016-7', 308 url: 'https://book.douban.com/subject/26895402/', 309 cover: 'https://img1.doubanio.com/view/subject/m/public/s29000000.jpg', 310 }, 311 ]), 312 }; 313 const results = await searchDouban(page, 'book', '经济学思维', 3); 314 expect(page.goto).toHaveBeenNthCalledWith(1, 'https://search.douban.com/book/subject_search?search_text=%E7%BB%8F%E6%B5%8E%E5%AD%A6%E6%80%9D%E7%BB%B4&cat=1001', { 315 waitUntil: 'load', 316 settleMs: 1500, 317 }); 318 expect(page.goto).toHaveBeenCalledTimes(2); 319 expect(page.wait).toHaveBeenCalledWith({ 320 selector: '.item-root .title-text, .item-root .title a, .result-list .result-item h3 a', 321 timeout: 8, 322 }); 323 expect(results).toEqual([ 324 { 325 rank: 1, 326 id: '26895402', 327 type: 'book', 328 title: '经济学思维', 329 rating: 7.9, 330 abstract: '李子畅 / 中信出版社 / 2016-7', 331 url: 'https://book.douban.com/subject/26895402/', 332 cover: 'https://img1.doubanio.com/view/subject/m/public/s29000000.jpg', 333 }, 334 ]); 335 }); 336 }); 337 338 describe('inferDoubanSearchResultType', () => { 339 it('returns tvshow for movie search results marked as TV', () => { 340 expect(inferDoubanSearchResultType('movie', { 341 moreUrl: "onclick=\"moreurl(this,{is_tv:'1'})\"", 342 labels: [{ text: '剧集' }], 343 })).toBe('tvshow'); 344 }); 345 346 it('returns movie when a movie search result has no TV signal', () => { 347 expect(inferDoubanSearchResultType('movie', { 348 moreUrl: "onclick=\"moreurl(this,{is_tv:'0'})\"", 349 labels: [{ text: '可播放' }], 350 })).toBe('movie'); 351 }); 352 353 it('preserves non-movie search types', () => { 354 expect(inferDoubanSearchResultType('book', { 355 moreUrl: '', 356 labels: [{ text: '图书' }], 357 })).toBe('book'); 358 }); 359 });