/ clis / douban / top250.js
top250.js
 1  import { cli, Strategy } from '@jackwener/opencli/registry';
 2  cli({
 3      site: 'douban',
 4      name: 'top250',
 5      description: '豆瓣电影 Top250',
 6      domain: 'movie.douban.com',
 7      strategy: Strategy.COOKIE,
 8      browser: true,
 9      args: [
10          { name: 'limit', type: 'int', default: 250, help: '返回结果数量' },
11      ],
12      columns: ['rank', 'id', 'title', 'rating', 'url'],
13      pipeline: [
14          { navigate: 'https://movie.douban.com/top250' },
15          { evaluate: `async () => {
16    const results = [];
17    const limit = \${{ args.limit }};
18  
19    const parsePage = (doc) => {
20      const items = doc.querySelectorAll('.item');
21      for (const item of items) {
22        if (results.length >= limit) break;
23  
24        const rankEl = item.querySelector('.pic em');
25        const linkEl = item.querySelector('a');
26        const titleEl = item.querySelector('.title');
27        const ratingEl = item.querySelector('.rating_num');
28  
29        const href = linkEl?.href || '';
30        const matchResult = href.match(/subject\\/(\\d+)/);
31        const id = matchResult ? matchResult[1] : '';
32  
33        const title = titleEl?.textContent?.trim() || '';
34        const rank = parseInt(rankEl?.textContent || '0', 10);
35        const rating = ratingEl?.textContent?.trim() || '';
36  
37        if (id && title) {
38          results.push({
39            rank: rank || results.length + 1,
40            id,
41            title,
42            rating: rating ? parseFloat(rating) : 0,
43            url: href
44          });
45        }
46      }
47    };
48  
49    parsePage(document);
50  
51    for (let start = 25; start < 250 && results.length < limit; start += 25) {
52      const resp = await fetch(\`https://movie.douban.com/top250?start=\${start}\`);
53      if (!resp.ok) break;
54      const html = await resp.text();
55      if (!html) break;
56  
57      const doc = new DOMParser().parseFromString(html, 'text/html');
58      parsePage(doc);
59      await new Promise(r => setTimeout(r, 150));
60    }
61  
62    return results;
63  }
64  ` },
65          { limit: '${{ args.limit }}' },
66      ],
67  });