/ clis / gov-policy / search.js
search.js
 1  import { cli, Strategy } from '@jackwener/opencli/registry';
 2  import { clampInt, requireNonEmptyQuery } from '../_shared/common.js';
 3  
 4  cli({
 5      site: 'gov-policy',
 6      name: 'search',
 7      description: '中国政府网政策文件搜索',
 8      domain: 'sousuo.www.gov.cn',
 9      strategy: Strategy.PUBLIC,
10      browser: true,
11      args: [
12          { name: 'query', positional: true, required: true, help: '搜索关键词' },
13          { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' },
14      ],
15      columns: ['rank', 'title', 'description', 'date', 'url'],
16      navigateBefore: false,
17      func: async (page, kwargs) => {
18          const limit = clampInt(kwargs.limit, 10, 1, 20);
19          const query = requireNonEmptyQuery(kwargs.query);
20          await page.goto(`https://sousuo.www.gov.cn/sousuo/search.shtml?code=17da70961a7&dataTypeId=107&searchWord=${encodeURIComponent(query)}`);
21          await page.wait(5);
22          const data = await page.evaluate(`
23        (async () => {
24          const normalize = v => (v || '').replace(/\\s+/g, ' ').trim();
25          for (let i = 0; i < 30; i++) {
26            if (document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item').length > 0) break;
27            await new Promise(r => setTimeout(r, 500));
28          }
29          const results = [];
30          for (const el of document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item')) {
31            const titleEl = el.querySelector('a.title, .title a, a.log-anchor');
32            const title = normalize(titleEl?.textContent).replace(/<[^>]+>/g, '');
33            if (!title || title.length < 4) continue;
34  
35            let url = titleEl?.getAttribute('href') || '';
36            if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url;
37  
38            const description = normalize(el.querySelector('.description')?.textContent).slice(0, 120);
39            const date = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/)?.[1] || '';
40            results.push({ rank: results.length + 1, title, description, date, url });
41            if (results.length >= ${limit}) break;
42          }
43          return results;
44        })()
45      `);
46          return Array.isArray(data) ? data : [];
47      },
48  });