/ clis / cnki / search.js
search.js
 1  import { cli, Strategy } from '@jackwener/opencli/registry';
 2  import { clampInt, requireNonEmptyQuery } from '../_shared/common.js';
 3  cli({
 4      site: 'cnki',
 5      name: 'search',
 6      description: '中国知网论文搜索(海外版)',
 7      domain: 'oversea.cnki.net',
 8      strategy: Strategy.COOKIE,
 9      args: [
10          { name: 'query', positional: true, required: true, help: '搜索关键词' },
11          { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
12      ],
13      columns: ['rank', 'title', 'authors', 'journal', 'date', 'url'],
14      navigateBefore: false,
15      func: async (page, kwargs) => {
16          const limit = clampInt(kwargs.limit, 10, 1, 20);
17          const query = requireNonEmptyQuery(kwargs.query);
18          await page.goto(`https://oversea.cnki.net/kns/search?dbcode=CFLS&kw=${encodeURIComponent(query)}&korder=SU`);
19          await page.wait(8);
20          const data = await page.evaluate(`
21        (async () => {
22          const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
23          for (let i = 0; i < 40; i++) {
24            if (document.querySelector('.result-table-list tbody tr, #gridTable tbody tr')) break;
25            await new Promise(r => setTimeout(r, 500));
26          }
27          const rows = document.querySelectorAll('.result-table-list tbody tr, #gridTable tbody tr');
28          const results = [];
29          for (const row of rows) {
30            const tds = row.querySelectorAll('td');
31            if (tds.length < 5) continue;
32  
33            const nameCell = row.querySelector('td.name') || tds[2];
34            const titleEl = nameCell?.querySelector('a');
35            const title = normalize(titleEl?.textContent).replace(/免费$/, '');
36            if (!title) continue;
37  
38            let url = titleEl?.getAttribute('href') || '';
39            if (url && !url.startsWith('http')) url = 'https://oversea.cnki.net' + url;
40  
41            const authorCell = row.querySelector('td.author') || tds[3];
42            const journalCell = row.querySelector('td.source') || tds[4];
43            const dateCell = row.querySelector('td.date') || tds[5];
44  
45            results.push({
46              rank: results.length + 1,
47              title,
48              authors: normalize(authorCell?.textContent),
49              journal: normalize(journalCell?.textContent),
50              date: normalize(dateCell?.textContent),
51              url,
52            });
53            if (results.length >= ${limit}) break;
54          }
55          return results;
56        })()
57      `);
58          return Array.isArray(data) ? data : [];
59      },
60  });