utils.js
1 import { CliError } from '@jackwener/opencli/errors'; 2 export const BLOOMBERG_FEEDS = { 3 main: 'https://feeds.bloomberg.com/news.rss', 4 markets: 'https://feeds.bloomberg.com/markets/news.rss', 5 economics: 'https://feeds.bloomberg.com/economics/news.rss', 6 industries: 'https://feeds.bloomberg.com/industries/news.rss', 7 tech: 'https://feeds.bloomberg.com/technology/news.rss', 8 politics: 'https://feeds.bloomberg.com/politics/news.rss', 9 businessweek: 'https://feeds.bloomberg.com/businessweek/news.rss', 10 opinions: 'https://feeds.bloomberg.com/bview/news.rss', 11 }; 12 const DEFAULT_USER_AGENT = 'Mozilla/5.0 (compatible; opencli)'; 13 export async function fetchBloombergFeed(name, limit = 1) { 14 const feedUrl = BLOOMBERG_FEEDS[name]; 15 if (!feedUrl) { 16 throw new CliError('ARGUMENT', `Unknown Bloomberg feed: ${name}`); 17 } 18 const resp = await fetch(feedUrl, { 19 headers: { 'User-Agent': DEFAULT_USER_AGENT }, 20 }); 21 if (!resp.ok) { 22 throw new CliError('FETCH_ERROR', `Bloomberg RSS HTTP ${resp.status}`, 'Bloomberg may be temporarily unavailable; try again later.'); 23 } 24 const xml = await resp.text(); 25 const items = parseBloombergRss(xml); 26 if (!items.length) { 27 throw new CliError('NOT_FOUND', 'Bloomberg RSS feed returned no items', 'Bloomberg may have changed the feed format.'); 28 } 29 const count = Math.max(1, Math.min(Number(limit) || 1, 20)); 30 return items.slice(0, count); 31 } 32 export function parseBloombergRss(xml) { 33 const items = []; 34 const itemRegex = /<item\b[^>]*>([\s\S]*?)<\/item>/gi; 35 let match; 36 while ((match = itemRegex.exec(xml))) { 37 const block = match[1]; 38 const title = extractTagText(block, 'title'); 39 const summary = extractTagText(block, 'description'); 40 const link = extractTagText(block, 'link') || extractTagText(block, 'guid'); 41 const mediaLinks = extractMediaLinksFromRssItem(block); 42 if (!title || !link) 43 continue; 44 items.push({ 45 title, 46 summary, 47 link, 48 mediaLinks, 49 }); 50 } 51 return items; 52 } 53 export function normalizeBloombergLink(input) { 54 const raw = String(input || '').trim(); 55 if (!raw) { 56 throw new CliError('ARGUMENT', 'A Bloomberg link is required'); 57 } 58 if (raw.startsWith('/')) 59 return `https://www.bloomberg.com${raw}`; 60 return raw; 61 } 62 export function validateBloombergLink(input) { 63 const normalized = normalizeBloombergLink(input); 64 let url; 65 try { 66 url = new URL(normalized); 67 } 68 catch { 69 throw new CliError('ARGUMENT', `Invalid Bloomberg link: ${input}`, 'Pass a full https://www.bloomberg.com/... URL or a relative Bloomberg path.'); 70 } 71 if (!/(?:\.|^)bloomberg\.com$/i.test(url.hostname)) { 72 throw new CliError('ARGUMENT', `Expected a bloomberg.com link, got: ${url.hostname}`, 'Pass a Bloomberg article URL from bloomberg.com.'); 73 } 74 return url.toString(); 75 } 76 export function renderStoryBody(body) { 77 const blocks = Array.isArray(body?.content) ? body.content : []; 78 const parts = blocks 79 .map((block) => renderBlock(block, 0)) 80 .map((part) => normalizeBlockText(part)) 81 .filter(Boolean); 82 return parts.join('\n\n').replace(/\n{3,}/g, '\n\n').trim(); 83 } 84 export function extractStoryMediaLinks(story) { 85 const urls = new Set(); 86 collectMediaUrls(story?.ledeImageUrl, urls); 87 collectMediaUrls(story?.socialImageUrl, urls); 88 collectMediaUrls(story?.lede, urls); 89 collectMediaUrls(story?.imageAttachments, urls); 90 collectMediaUrls(story?.videoAttachments, urls); 91 const mediaBlocks = Array.isArray(story?.body?.content) 92 ? story.body.content.filter((block) => block?.type === 'media') 93 : []; 94 collectMediaUrls(mediaBlocks, urls); 95 return [...urls]; 96 } 97 function renderBlock(block, depth) { 98 if (!block || typeof block !== 'object') 99 return ''; 100 switch (block.type) { 101 case 'paragraph': 102 return renderInlineNodes(block.content || []); 103 case 'heading': { 104 const text = renderInlineNodes(block.content || []); 105 if (!text) 106 return ''; 107 const level = Number(block.data?.level ?? block.data?.weight ?? 2); 108 const prefix = level <= 1 ? '# ' : level === 2 ? '## ' : '### '; 109 return `${prefix}${text}`; 110 } 111 case 'blockquote': { 112 const text = renderInlineNodes(block.content || []); 113 if (!text) 114 return ''; 115 return text.split('\n').map((line) => line ? `> ${line}` : '>').join('\n'); 116 } 117 case 'list': 118 return renderListBlock(block, depth); 119 case 'tabularData': 120 return renderTabularDataBlock(block); 121 case 'media': 122 return renderMediaBlock(block); 123 case 'inline-newsletter': 124 case 'newsletter': 125 case 'ad': 126 return ''; 127 default: { 128 if (Array.isArray(block.content) && block.content.length > 0) { 129 const inlineText = renderInlineNodes(block.content); 130 if (inlineText) 131 return inlineText; 132 const nested = block.content.map((child) => renderBlock(child, depth + 1)).filter(Boolean); 133 if (nested.length) 134 return nested.join('\n'); 135 } 136 return extractGenericText(block); 137 } 138 } 139 } 140 function renderInlineNodes(nodes) { 141 return nodes.map((node) => renderInlineNode(node)).join(''); 142 } 143 function renderInlineNode(node) { 144 if (node == null) 145 return ''; 146 if (typeof node === 'string') 147 return decodeXmlEntities(node); 148 switch (node.type) { 149 case 'text': 150 return decodeXmlEntities(node.value || ''); 151 case 'linebreak': 152 return '\n'; 153 case 'link': 154 case 'entity': 155 case 'strong': 156 case 'emphasis': 157 case 'italic': 158 case 'underline': 159 case 'span': 160 if (Array.isArray(node.content) && node.content.length > 0) { 161 return renderInlineNodes(node.content); 162 } 163 return decodeXmlEntities(node.value || ''); 164 default: 165 if (Array.isArray(node.content) && node.content.length > 0) { 166 return renderInlineNodes(node.content); 167 } 168 if (typeof node.value === 'string') 169 return decodeXmlEntities(node.value); 170 return ''; 171 } 172 } 173 function renderListBlock(block, depth) { 174 const items = Array.isArray(block.content) ? block.content : []; 175 if (!items.length) 176 return ''; 177 const listStyle = String(block.subType || block.data?.style || block.data?.listType || ''); 178 const ordered = /\bordered\b|\bnumber(?:ed)?\b/i.test(listStyle); 179 let index = 1; 180 return items 181 .map((item) => { 182 const prefix = ordered ? `${index++}. ` : '- '; 183 return renderListItem(item, prefix, depth); 184 }) 185 .filter(Boolean) 186 .join('\n'); 187 } 188 function renderListItem(item, prefix, depth) { 189 const indent = ' '.repeat(depth); 190 const body = normalizeBlockText(renderListItemBody(item, depth + 1)); 191 if (!body) 192 return ''; 193 const lines = body.split('\n'); 194 const head = `${indent}${prefix}${lines[0]}`; 195 if (lines.length === 1) 196 return head; 197 const continuationIndent = `${indent}${' '.repeat(prefix.length)}`; 198 const tail = lines.slice(1).map((line) => `${continuationIndent}${line}`).join('\n'); 199 return `${head}\n${tail}`; 200 } 201 function renderListItemBody(item, depth) { 202 if (!item || typeof item !== 'object') 203 return ''; 204 if (item.type === 'list-item' && Array.isArray(item.content)) { 205 const parts = item.content 206 .map((child) => child?.type === 'paragraph' 207 ? renderInlineNodes(child.content || []) 208 : renderBlock(child, depth)) 209 .map((part) => normalizeBlockText(part)) 210 .filter(Boolean); 211 return parts.join('\n'); 212 } 213 return renderBlock(item, depth); 214 } 215 function renderTabularDataBlock(block) { 216 const rows = block?.data?.rows ?? block?.data?.table?.rows ?? block?.content; 217 if (!Array.isArray(rows) || !rows.length) { 218 return extractGenericText(block.data || block.content || block); 219 } 220 const lines = rows 221 .map((row) => extractGenericText(row)) 222 .map((line) => normalizeBlockText(line)) 223 .filter(Boolean); 224 return lines.join('\n'); 225 } 226 function renderMediaBlock(block) { 227 const candidates = [ 228 block?.data?.chart?.caption, 229 block?.data?.attachment?.caption, 230 block?.data?.attachment?.title, 231 block?.data?.attachment?.subtitle, 232 block?.data?.video?.caption, 233 ]; 234 const caption = candidates 235 .map((value) => normalizeBlockText(stripHtml(String(value || '')))) 236 .find(Boolean); 237 return caption || ''; 238 } 239 function extractGenericText(value) { 240 const parts = []; 241 collectText(value, parts); 242 return parts.join(' ').replace(/\s+/g, ' ').trim(); 243 } 244 function collectText(value, out) { 245 if (value == null) 246 return; 247 if (typeof value === 'string') { 248 const text = normalizeBlockText(stripHtml(decodeXmlEntities(value))); 249 if (text) 250 out.push(text); 251 return; 252 } 253 if (Array.isArray(value)) { 254 for (const item of value) 255 collectText(item, out); 256 return; 257 } 258 if (typeof value === 'object') { 259 if (typeof value.value === 'string') { 260 const text = normalizeBlockText(stripHtml(decodeXmlEntities(value.value))); 261 if (text) 262 out.push(text); 263 return; 264 } 265 if (Array.isArray(value.content)) { 266 collectText(value.content, out); 267 return; 268 } 269 for (const entry of Object.values(value)) 270 collectText(entry, out); 271 } 272 } 273 function extractTagText(block, tag) { 274 const safeTag = escapeRegExp(tag); 275 const match = block.match(new RegExp(`<${safeTag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${safeTag}>`, 'i')); 276 if (!match) 277 return ''; 278 return normalizeBlockText(stripHtml(decodeXmlEntities(stripCdata(match[1])))); 279 } 280 function extractMediaLinksFromRssItem(block) { 281 const links = new Set(); 282 const mediaRegex = /<(?:media:content|media:thumbnail|enclosure)\b[^>]*\burl="([^"]+)"[^>]*>/gi; 283 let match; 284 while ((match = mediaRegex.exec(block))) { 285 const url = decodeXmlEntities(match[1] || '').trim(); 286 if (url) 287 links.add(url); 288 } 289 return [...links]; 290 } 291 function collectMediaUrls(value, out, seen = new WeakSet()) { 292 if (value == null) 293 return; 294 if (typeof value === 'string') { 295 const normalized = normalizeMediaUrl(value); 296 if (normalized) 297 out.add(normalized); 298 return; 299 } 300 if (Array.isArray(value)) { 301 for (const item of value) 302 collectMediaUrls(item, out, seen); 303 return; 304 } 305 if (typeof value === 'object') { 306 if (seen.has(value)) 307 return; 308 seen.add(value); 309 for (const key of ['url', 'src', 'fallback', 'poster']) { 310 const candidate = value[key]; 311 if (typeof candidate === 'string') { 312 const normalized = normalizeMediaUrl(candidate); 313 if (normalized) 314 out.add(normalized); 315 } 316 } 317 for (const entry of Object.values(value)) { 318 collectMediaUrls(entry, out, seen); 319 } 320 } 321 } 322 function normalizeMediaUrl(value) { 323 const url = decodeXmlEntities(String(value || '')).trim(); 324 if (!/^https?:\/\//i.test(url)) 325 return null; 326 if (!looksLikeMediaUrl(url)) 327 return null; 328 return url; 329 } 330 function looksLikeMediaUrl(url) { 331 return /(?:assets\.bwbx\.io|resource\.bloomberg\.com|media\.bloomberg\.com)/i.test(url) 332 || /\.(?:jpg|jpeg|png|webp|gif|svg|mp4|m3u8)(?:[?#].*)?$/i.test(url); 333 } 334 function stripCdata(value) { 335 const match = value.match(/^<!\[CDATA\[([\s\S]*?)\]\]>$/); 336 return match ? match[1] : value; 337 } 338 function stripHtml(value) { 339 return String(value || '').replace(/<[^>]+>/g, ' '); 340 } 341 function decodeXmlEntities(value) { 342 return String(value || '') 343 .replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1') 344 .replace(/&#(\d+);/g, (_m, code) => String.fromCodePoint(Number(code))) 345 .replace(/&#x([0-9a-f]+);/gi, (_m, code) => String.fromCodePoint(parseInt(code, 16))) 346 .replace(/&/g, '&') 347 .replace(/</g, '<') 348 .replace(/>/g, '>') 349 .replace(/"/g, '"') 350 .replace(/'/g, "'") 351 .replace(/'/g, "'") 352 .replace(/ /g, ' '); 353 } 354 function normalizeBlockText(value) { 355 return String(value || '') 356 .replace(/\r/g, '') 357 .replace(/[ \t]+\n/g, '\n') 358 .replace(/\n[ \t]+/g, '\n') 359 .replace(/[ \t]{2,}/g, ' ') 360 .trim(); 361 } 362 function escapeRegExp(value) { 363 return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); 364 }