/ clis / xiaoe / content.js
content.js
 1  import { cli, Strategy } from '@jackwener/opencli/registry';
 2  cli({
 3      site: 'xiaoe',
 4      name: 'content',
 5      description: '提取小鹅通图文页面内容为文本',
 6      domain: 'h5.xet.citv.cn',
 7      strategy: Strategy.COOKIE,
 8      args: [
 9          { name: 'url', required: true, positional: true, help: '页面 URL' },
10      ],
11      columns: ['title', 'content_length', 'image_count'],
12      pipeline: [
13          { navigate: '${{ args.url }}' },
14          { wait: 6 },
15          { evaluate: `(() => {
16    var selectors = ['.rich-text-wrap','.content-wrap','.article-content','.text-content',
17      '.course-detail','.detail-content','[class*="richtext"]','[class*="rich-text"]','.ql-editor'];
18    var content = '';
19    for (var i = 0; i < selectors.length; i++) {
20      var el = document.querySelector(selectors[i]);
21      if (el && el.innerText.trim().length > 50) { content = el.innerText.trim(); break; }
22    }
23    if (!content) content = (document.querySelector('main') || document.querySelector('#app') || document.body).innerText.trim();
24  
25    var images = [];
26    document.querySelectorAll('img').forEach(function(img) {
27      if (img.src && !img.src.startsWith('data:') && img.src.includes('xiaoe')) images.push(img.src);
28    });
29    return [{
30      title: document.title,
31      content: content,
32      content_length: content.length,
33      image_count: images.length,
34      images: JSON.stringify(images.slice(0, 20)),
35    }];
36  })()
37  ` },
38      ],
39  });