Cradicle Explorer

/ clis / google / utils.test.js
utils.test.js
 1  import { describe, it, expect } from 'vitest';
 2  import { parseRssItems } from './utils.js';
 3  describe('parseRssItems', () => {
 4      it('extracts plain text fields', () => {
 5          const xml = `
 6        <channel>
 7          <item><title>Hello</title><link>https://example.com</link></item>
 8          <item><title>World</title><link>https://test.com</link></item>
 9        </channel>
10      `;
11          const items = parseRssItems(xml, ['title', 'link']);
12          expect(items).toEqual([
13              { title: 'Hello', link: 'https://example.com' },
14              { title: 'World', link: 'https://test.com' },
15          ]);
16      });
17      it('handles CDATA-wrapped content', () => {
18          const xml = `
19        <item><title><![CDATA[Breaking News]]></title><link>https://news.com</link></item>
20      `;
21          const items = parseRssItems(xml, ['title', 'link']);
22          expect(items).toEqual([
23              { title: 'Breaking News', link: 'https://news.com' },
24          ]);
25      });
26      it('handles namespaced fields like ht:approx_traffic', () => {
27          const xml = `
28        <item>
29          <title>AI</title>
30          <ht:approx_traffic>500,000+</ht:approx_traffic>
31          <pubDate>Mon, 20 Mar 2026</pubDate>
32        </item>
33      `;
34          const items = parseRssItems(xml, ['title', 'ht:approx_traffic', 'pubDate']);
35          expect(items).toEqual([
36              { title: 'AI', 'ht:approx_traffic': '500,000+', pubDate: 'Mon, 20 Mar 2026' },
37          ]);
38      });
39      it('returns empty string for missing fields', () => {
40          const xml = `<item><title>Test</title></item>`;
41          const items = parseRssItems(xml, ['title', 'missing']);
42          expect(items).toEqual([{ title: 'Test', missing: '' }]);
43      });
44      it('handles tags with attributes (e.g. <source url="...">)', () => {
45          const xml = `
46        <item>
47          <title><![CDATA[AI reshapes everything - Reuters]]></title>
48          <source url="https://reuters.com">Reuters</source>
49          <link>https://news.google.com/123</link>
50        </item>
51      `;
52          const items = parseRssItems(xml, ['title', 'source', 'link']);
53          expect(items).toEqual([
54              { title: 'AI reshapes everything - Reuters', source: 'Reuters', link: 'https://news.google.com/123' },
55          ]);
56      });
57      it('handles mixed CDATA and plain text in the same item', () => {
58          const xml = `
59        <item>
60          <title><![CDATA[Breaking: Major event]]></title>
61          <link>https://example.com/article</link>
62          <pubDate>Fri, 21 Mar 2026</pubDate>
63        </item>
64      `;
65          const items = parseRssItems(xml, ['title', 'link', 'pubDate']);
66          expect(items).toEqual([
67              { title: 'Breaking: Major event', link: 'https://example.com/article', pubDate: 'Fri, 21 Mar 2026' },
68          ]);
69      });
70      it('returns empty array for no items', () => {
71          const xml = `<channel><title>Empty</title></channel>`;
72          const items = parseRssItems(xml, ['title']);
73          expect(items).toEqual([]);
74      });
75  });