search.js
1 import { cli, Strategy } from '@jackwener/opencli/registry'; 2 import { clampInt, requireNonEmptyQuery } from '../_shared/common.js'; 3 4 cli({ 5 site: 'gov-policy', 6 name: 'search', 7 description: '中国政府网政策文件搜索', 8 domain: 'sousuo.www.gov.cn', 9 strategy: Strategy.PUBLIC, 10 browser: true, 11 args: [ 12 { name: 'query', positional: true, required: true, help: '搜索关键词' }, 13 { name: 'limit', type: 'int', default: 10, help: '返回结果数量 (max 20)' }, 14 ], 15 columns: ['rank', 'title', 'description', 'date', 'url'], 16 navigateBefore: false, 17 func: async (page, kwargs) => { 18 const limit = clampInt(kwargs.limit, 10, 1, 20); 19 const query = requireNonEmptyQuery(kwargs.query); 20 await page.goto(`https://sousuo.www.gov.cn/sousuo/search.shtml?code=17da70961a7&dataTypeId=107&searchWord=${encodeURIComponent(query)}`); 21 await page.wait(5); 22 const data = await page.evaluate(` 23 (async () => { 24 const normalize = v => (v || '').replace(/\\s+/g, ' ').trim(); 25 for (let i = 0; i < 30; i++) { 26 if (document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item').length > 0) break; 27 await new Promise(r => setTimeout(r, 500)); 28 } 29 const results = []; 30 for (const el of document.querySelectorAll('.basic_result_content .item, .js_basic_result_content .item')) { 31 const titleEl = el.querySelector('a.title, .title a, a.log-anchor'); 32 const title = normalize(titleEl?.textContent).replace(/<[^>]+>/g, ''); 33 if (!title || title.length < 4) continue; 34 35 let url = titleEl?.getAttribute('href') || ''; 36 if (url && !url.startsWith('http')) url = 'https://www.gov.cn' + url; 37 38 const description = normalize(el.querySelector('.description')?.textContent).slice(0, 120); 39 const date = (el.textContent || '').match(/(\\d{4}[-./]\\d{1,2}[-./]\\d{1,2})/)?.[1] || ''; 40 results.push({ rank: results.length + 1, title, description, date, url }); 41 if (results.length >= ${limit}) break; 42 } 43 return results; 44 })() 45 `); 46 return Array.isArray(data) ? data : []; 47 }, 48 });