Cradicle Explorer

/ clis / 36kr / news.test.js
news.test.js
 1  import { describe, it, expect, vi, afterEach } from 'vitest';
 2  const SAMPLE_RSS = `<?xml version="1.0" encoding="UTF-8"?>
 3  <rss version="2.0"><channel><title>36氪</title>
 4  <item>
 5    <title>红杉中国领投AI公司「示例」，金额近2亿元</title>
 6    <link><![CDATA[https://36kr.com/p/1111111111111111?f=rss]]></link>
 7    <pubDate>2026-03-26 10:00:00  +0800</pubDate>
 8  </item>
 9  <item>
10    <title>马斯克旗下xAI估值突破1000亿美元</title>
11    <link><![CDATA[https://36kr.com/p/2222222222222222?f=rss]]></link>
12    <pubDate>2026-03-26 09:00:00  +0800</pubDate>
13  </item>
14  <item>
15    <title>OpenAI发布GPT-5，多模态能力大幅提升</title>
16    <link><![CDATA[https://36kr.com/p/3333333333333333?f=rss]]></link>
17    <pubDate>2026-03-25 20:00:00  +0800</pubDate>
18  </item>
19  </channel></rss>`;
20  afterEach(() => {
21      vi.restoreAllMocks();
22  });
23  describe('36kr/news RSS parsing', () => {
24      it('parses RSS feed into ranked news items', async () => {
25          vi.spyOn(globalThis, 'fetch').mockResolvedValue({
26              ok: true,
27              text: async () => SAMPLE_RSS,
28          });
29          // Direct RSS parse test using the same regex logic as news.ts
30          const xml = SAMPLE_RSS;
31          const items = [];
32          const itemRegex = /<item>([\s\S]*?)<\/item>/g;
33          let match;
34          while ((match = itemRegex.exec(xml)) && items.length < 10) {
35              const block = match[1];
36              const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
37              const url = block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ??
38                  block.match(/<link>(.*?)<\/link>/)?.[1] ??
39                  '';
40              const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? '';
41              const date = pubDate.slice(0, 10);
42              if (title)
43                  items.push({ rank: items.length + 1, title, date, url: url.trim() });
44          }
45          expect(items).toHaveLength(3);
46          expect(items[0].rank).toBe(1);
47          expect(items[0].title).toBe('红杉中国领投AI公司「示例」，金额近2亿元');
48          expect(items[0].date).toBe('2026-03-26');
49          expect(items[0].url).toBe('https://36kr.com/p/1111111111111111?f=rss');
50      });
51      it('respects limit — returns at most N items', async () => {
52          const xml = SAMPLE_RSS;
53          const limit = 2;
54          const items = [];
55          const itemRegex = /<item>([\s\S]*?)<\/item>/g;
56          let match;
57          while ((match = itemRegex.exec(xml)) && items.length < limit) {
58              const block = match[1];
59              const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
60              const url = block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ?? '';
61              const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? '';
62              const date = pubDate.slice(0, 10);
63              if (title)
64                  items.push({ rank: items.length + 1, title, date, url: url.trim() });
65          }
66          expect(items).toHaveLength(2);
67      });
68      it('skips items with empty title', async () => {
69          const xml = `<rss><channel>
70        <item><title></title><link>https://36kr.com/p/0</link><pubDate>2026-01-01</pubDate></item>
71        <item><title>有标题的文章</title><link>https://36kr.com/p/1</link><pubDate>2026-01-01</pubDate></item>
72      </channel></rss>`;
73          const items = [];
74          const itemRegex = /<item>([\s\S]*?)<\/item>/g;
75          let match;
76          while ((match = itemRegex.exec(xml))) {
77              const block = match[1];
78              const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
79              if (title)
80                  items.push({ title });
81          }
82          expect(items).toHaveLength(1);
83          expect(items[0].title).toBe('有标题的文章');
84      });
85  });