/ clis / google-scholar / search.js
search.js
 1  import { cli, Strategy } from '@jackwener/opencli/registry';
 2  import { clampInt, requireNonEmptyQuery } from '../_shared/common.js';
 3  
 4  cli({
 5      site: 'google-scholar',
 6      name: 'search',
 7      description: 'Google Scholar 学术搜索',
 8      domain: 'scholar.google.com',
 9      strategy: Strategy.PUBLIC,
10      browser: true,
11      args: [
12          { name: 'query', positional: true, required: true, help: '搜索关键词' },
13          { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
14      ],
15      columns: ['rank', 'title', 'authors', 'source', 'year', 'cited', 'url'],
16      navigateBefore: false,
17      func: async (page, kwargs) => {
18          const limit = clampInt(kwargs.limit, 10, 1, 20);
19          const query = requireNonEmptyQuery(kwargs.query);
20          await page.goto(`https://scholar.google.com/scholar?q=${encodeURIComponent(query)}&hl=zh-CN`);
21          await page.wait(3);
22          const data = await page.evaluate(`
23        (() => {
24          const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
25          const results = [];
26          for (const el of document.querySelectorAll('.gs_r.gs_or.gs_scl, .gs_ri')) {
27            const container = el.querySelector('.gs_ri') || el;
28            const titleEl = container.querySelector('.gs_rt a, h3 a');
29            const title = normalize(titleEl?.textContent);
30            if (!title) continue;
31  
32            const url = titleEl?.getAttribute('href') || '';
33            const infoLine = normalize(container.querySelector('.gs_a')?.textContent);
34            const parts = infoLine.split(' - ');
35            const authors = (parts[0] || '').trim();
36            const sourceParts = (parts[1] || '').split(',');
37            const source = sourceParts.slice(0, -1).join(',').trim() || sourceParts[0]?.trim() || '';
38            const year = infoLine.match(/(19|20)\\d{2}/)?.[0] || '';
39            const citedText = normalize(container.querySelector('.gs_fl a[href*="cites"]')?.textContent);
40            const cited = citedText.match(/(\\d+)/)?.[1] || '0';
41  
42            results.push({
43              rank: results.length + 1,
44              title,
45              authors: authors.slice(0, 80),
46              source: source.slice(0, 60),
47              year,
48              cited,
49              url,
50            });
51            if (results.length >= ${limit}) break;
52          }
53          return results;
54        })()
55      `);
56          return Array.isArray(data) ? data : [];
57      },
58  });