utils.ts
1 /** 2 * Shared utility functions used across the codebase. 3 */ 4 5 import * as fs from 'node:fs'; 6 import * as path from 'node:path'; 7 import TurndownService from 'turndown'; 8 9 /** Type guard: checks if a value is a non-null, non-array object. */ 10 export function isRecord(value: unknown): value is Record<string, unknown> { 11 return typeof value === 'object' && value !== null && !Array.isArray(value); 12 } 13 14 /** Simple async concurrency limiter. */ 15 export async function mapConcurrent<T, R>( 16 items: T[], 17 limit: number, 18 fn: (item: T, index: number) => Promise<R>, 19 ): Promise<R[]> { 20 const results: R[] = new Array(items.length); 21 let index = 0; 22 23 async function worker() { 24 while (index < items.length) { 25 const i = index++; 26 results[i] = await fn(items[i], i); 27 } 28 } 29 30 const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker()); 31 await Promise.all(workers); 32 return results; 33 } 34 35 /** Pause for the given number of milliseconds. */ 36 export function sleep(ms: number): Promise<void> { 37 return new Promise(resolve => setTimeout(resolve, ms)); 38 } 39 40 /** Save a base64-encoded string to a file, creating parent directories as needed. */ 41 export async function saveBase64ToFile(base64: string, filePath: string): Promise<void> { 42 const dir = path.dirname(filePath); 43 await fs.promises.mkdir(dir, { recursive: true }); 44 await fs.promises.writeFile(filePath, Buffer.from(base64, 'base64')); 45 } 46 47 export function createMarkdownConverter(configure?: (td: TurndownService) => void): TurndownService { 48 const td = new TurndownService({ 49 headingStyle: 'atx', 50 codeBlockStyle: 'fenced', 51 bulletListMarker: '-', 52 }); 53 54 td.addRule('linebreak', { 55 filter: 'br', 56 replacement: () => '\n', 57 }); 58 59 if (configure) configure(td); 60 return td; 61 } 62 63 export function htmlToMarkdown(value: string, configure?: (td: TurndownService) => void): string { 64 return createMarkdownConverter(configure).turndown(value || '') 65 .replace(/\u00a0/g, ' ') 66 .replace(/\n{4,}/g, '\n\n\n') 67 .replace(/[ \t]+$/gm, '') 68 .trim(); 69 }