content.js
1 import { cli, Strategy } from '@jackwener/opencli/registry'; 2 cli({ 3 site: 'xiaoe', 4 name: 'content', 5 description: '提取小鹅通图文页面内容为文本', 6 domain: 'h5.xet.citv.cn', 7 strategy: Strategy.COOKIE, 8 args: [ 9 { name: 'url', required: true, positional: true, help: '页面 URL' }, 10 ], 11 columns: ['title', 'content_length', 'image_count'], 12 pipeline: [ 13 { navigate: '${{ args.url }}' }, 14 { wait: 6 }, 15 { evaluate: `(() => { 16 var selectors = ['.rich-text-wrap','.content-wrap','.article-content','.text-content', 17 '.course-detail','.detail-content','[class*="richtext"]','[class*="rich-text"]','.ql-editor']; 18 var content = ''; 19 for (var i = 0; i < selectors.length; i++) { 20 var el = document.querySelector(selectors[i]); 21 if (el && el.innerText.trim().length > 50) { content = el.innerText.trim(); break; } 22 } 23 if (!content) content = (document.querySelector('main') || document.querySelector('#app') || document.body).innerText.trim(); 24 25 var images = []; 26 document.querySelectorAll('img').forEach(function(img) { 27 if (img.src && !img.src.startsWith('data:') && img.src.includes('xiaoe')) images.push(img.src); 28 }); 29 return [{ 30 title: document.title, 31 content: content, 32 content_length: content.length, 33 image_count: images.length, 34 images: JSON.stringify(images.slice(0, 20)), 35 }]; 36 })() 37 ` }, 38 ], 39 });