search.js
1 import { cli, Strategy } from '@jackwener/opencli/registry'; 2 import { clampInt, requireNonEmptyQuery } from '../_shared/common.js'; 3 4 cli({ 5 site: 'google-scholar', 6 name: 'search', 7 description: 'Google Scholar 学术搜索', 8 domain: 'scholar.google.com', 9 strategy: Strategy.PUBLIC, 10 browser: true, 11 args: [ 12 { name: 'query', positional: true, required: true, help: '搜索关键词' }, 13 { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, 14 ], 15 columns: ['rank', 'title', 'authors', 'source', 'year', 'cited', 'url'], 16 navigateBefore: false, 17 func: async (page, kwargs) => { 18 const limit = clampInt(kwargs.limit, 10, 1, 20); 19 const query = requireNonEmptyQuery(kwargs.query); 20 await page.goto(`https://scholar.google.com/scholar?q=${encodeURIComponent(query)}&hl=zh-CN`); 21 await page.wait(3); 22 const data = await page.evaluate(` 23 (() => { 24 const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); 25 const results = []; 26 for (const el of document.querySelectorAll('.gs_r.gs_or.gs_scl, .gs_ri')) { 27 const container = el.querySelector('.gs_ri') || el; 28 const titleEl = container.querySelector('.gs_rt a, h3 a'); 29 const title = normalize(titleEl?.textContent); 30 if (!title) continue; 31 32 const url = titleEl?.getAttribute('href') || ''; 33 const infoLine = normalize(container.querySelector('.gs_a')?.textContent); 34 const parts = infoLine.split(' - '); 35 const authors = (parts[0] || '').trim(); 36 const sourceParts = (parts[1] || '').split(','); 37 const source = sourceParts.slice(0, -1).join(',').trim() || sourceParts[0]?.trim() || ''; 38 const year = infoLine.match(/(19|20)\\d{2}/)?.[0] || ''; 39 const citedText = normalize(container.querySelector('.gs_fl a[href*="cites"]')?.textContent); 40 const cited = citedText.match(/(\\d+)/)?.[1] || '0'; 41 42 results.push({ 43 rank: results.length + 1, 44 title, 45 authors: authors.slice(0, 80), 46 source: source.slice(0, 60), 47 year, 48 cited, 49 url, 50 }); 51 if (results.length >= ${limit}) break; 52 } 53 return results; 54 })() 55 `); 56 return Array.isArray(data) ? data : []; 57 }, 58 });