template.ts
1 /** 2 * Pipeline template engine: ${{ ... }} expression rendering. 3 */ 4 5 import vm from 'node:vm'; 6 7 export interface RenderContext { 8 args?: Record<string, unknown>; 9 data?: unknown; 10 root?: unknown; 11 item?: unknown; 12 index?: number; 13 } 14 15 import { isRecord } from '../utils.js'; 16 17 export function render(template: unknown, ctx: RenderContext): unknown { 18 if (typeof template !== 'string') return template; 19 const trimmed = template.trim(); 20 // Full expression: entire string is a single ${{ ... }} 21 // Use [^}] to prevent matching across }} boundaries (e.g. "${{ a }}-${{ b }}") 22 const fullMatch = trimmed.match(/^\$\{\{\s*([^}]*(?:\}[^}][^}]*)*)\s*\}\}$/); 23 if (fullMatch && !trimmed.includes('}}-') && !trimmed.includes('}}${{')) return evalExpr(fullMatch[1].trim(), ctx); 24 // Check if the entire string is a single expression (no other text around it) 25 const singleExpr = trimmed.match(/^\$\{\{\s*([\s\S]*?)\s*\}\}$/); 26 if (singleExpr) { 27 // Verify it's truly a single expression (no other ${{ inside) 28 const inner = singleExpr[1]; 29 if (!inner.includes('${{')) return evalExpr(inner.trim(), ctx); 30 } 31 return template.replace(/\$\{\{\s*(.*?)\s*\}\}/g, (_m, expr) => String(evalExpr(expr.trim(), ctx))); 32 } 33 34 export function evalExpr(expr: string, ctx: RenderContext): unknown { 35 const args = ctx.args ?? {}; 36 const item = ctx.item ?? {}; 37 const data = ctx.data; 38 const root = ctx.root; 39 const index = ctx.index ?? 0; 40 41 // ── Pipe filters: expr | filter1(arg) | filter2 ── 42 // Split on single | (not ||) so "item.a || item.b | upper" works correctly. 43 const pipeSegments = expr.split(/(?<!\|)\|(?!\|)/).map(s => s.trim()); 44 if (pipeSegments.length > 1) { 45 let result = evalExpr(pipeSegments[0], ctx); 46 for (let i = 1; i < pipeSegments.length; i++) { 47 result = applyFilter(pipeSegments[i], result); 48 } 49 return result; 50 } 51 52 // Fast path: quoted string literal — skip VM overhead 53 const strLit = expr.match(/^(['"])(.*)\1$/); 54 if (strLit) return strLit[2]; 55 56 // Fast path: numeric literal 57 if (/^\d+(\.\d+)?$/.test(expr)) return Number(expr); 58 59 // Try resolving as a simple dotted path (item.foo.bar, args.limit, index) 60 const resolved = resolvePath(expr, { args, item, data, root, index }); 61 if (resolved !== null && resolved !== undefined) return resolved; 62 63 // Fallback: evaluate as JS in a sandboxed VM. 64 // Handles ||, ??, arithmetic, ternary, method calls, etc. natively. 65 return evalJsExpr(expr, { args, item, data, root, index }); 66 } 67 68 /** 69 * Apply a named filter to a value. 70 * Supported filters: 71 * default(val), join(sep), upper, lower, truncate(n), trim, 72 * replace(old,new), keys, length, first, last, json 73 */ 74 function applyFilter(filterExpr: string, value: unknown): unknown { 75 const match = filterExpr.match(/^(\w+)(?:\((.+)\))?$/); 76 if (!match) return value; 77 const [, name, rawArgs] = match; 78 const filterArg = rawArgs?.replace(/^['"]|['"]$/g, '') ?? ''; 79 80 switch (name) { 81 case 'default': { 82 if (value === null || value === undefined || value === '') { 83 const intVal = parseInt(filterArg, 10); 84 if (!Number.isNaN(intVal) && String(intVal) === filterArg.trim()) return intVal; 85 return filterArg; 86 } 87 return value; 88 } 89 case 'join': 90 return Array.isArray(value) ? value.join(filterArg || ', ') : value; 91 case 'upper': 92 return typeof value === 'string' ? value.toUpperCase() : value; 93 case 'lower': 94 return typeof value === 'string' ? value.toLowerCase() : value; 95 case 'trim': 96 return typeof value === 'string' ? value.trim() : value; 97 case 'truncate': { 98 const n = parseInt(filterArg, 10) || 50; 99 return typeof value === 'string' && value.length > n ? `${value.slice(0, n)}...` : value; 100 } 101 case 'replace': { 102 if (typeof value !== 'string') return value; 103 const parts = rawArgs?.split(',').map(s => s.trim().replace(/^['"]|['"]$/g, '')) ?? []; 104 return parts.length >= 2 ? value.replaceAll(parts[0], parts[1]) : value; 105 } 106 case 'keys': 107 return value && typeof value === 'object' ? Object.keys(value) : value; 108 case 'length': 109 return Array.isArray(value) ? value.length : typeof value === 'string' ? value.length : value; 110 case 'first': 111 return Array.isArray(value) ? value[0] : value; 112 case 'last': 113 return Array.isArray(value) ? value[value.length - 1] : value; 114 case 'json': 115 return JSON.stringify(value ?? null); 116 case 'slugify': 117 // Convert to URL-safe slug 118 return typeof value === 'string' 119 ? value 120 .toLowerCase() 121 .replace(/[^\p{L}\p{N}]+/gu, '-') 122 .replace(/^-|-$/g, '') 123 : value; 124 case 'sanitize': 125 // Remove invalid filename characters 126 return typeof value === 'string' 127 // biome-ignore lint/suspicious/noControlCharactersInRegex: intentional - strips C0 control chars from filenames 128 ? value.replace(/[<>:"/\\|?*\x00-\x1f]/g, '_') 129 : value; 130 case 'ext': { 131 // Extract file extension from URL or path 132 if (typeof value !== 'string') return value; 133 const lastDot = value.lastIndexOf('.'); 134 const lastSlash = Math.max(value.lastIndexOf('/'), value.lastIndexOf('\\')); 135 return lastDot > lastSlash ? value.slice(lastDot) : ''; 136 } 137 case 'basename': { 138 // Extract filename from URL or path 139 if (typeof value !== 'string') return value; 140 const parts = value.split(/[/\\]/); 141 return parts[parts.length - 1] || value; 142 } 143 case 'urlencode': 144 return typeof value === 'string' ? encodeURIComponent(value) : value; 145 case 'urldecode': 146 return typeof value === 'string' ? decodeURIComponent(value) : value; 147 default: 148 return value; 149 } 150 } 151 152 export function resolvePath(pathStr: string, ctx: RenderContext): unknown { 153 const args = ctx.args ?? {}; 154 const item = ctx.item ?? {}; 155 const data = ctx.data; 156 const root = ctx.root; 157 const index = ctx.index ?? 0; 158 const parts = pathStr.split('.'); 159 const rootName = parts[0]; 160 let obj: unknown; 161 let rest: string[]; 162 if (rootName === 'args') { obj = args; rest = parts.slice(1); } 163 else if (rootName === 'item') { obj = item; rest = parts.slice(1); } 164 else if (rootName === 'data') { obj = data; rest = parts.slice(1); } 165 else if (rootName === 'root') { obj = root; rest = parts.slice(1); } 166 else if (rootName === 'index') return index; 167 else { obj = item; rest = parts; } 168 for (const part of rest) { 169 if (isRecord(obj)) obj = obj[part]; 170 else if (Array.isArray(obj) && /^\d+$/.test(part)) obj = obj[parseInt(part, 10)]; 171 else return null; 172 } 173 return obj; 174 } 175 176 /** 177 * Evaluate arbitrary JS expressions as a last-resort fallback. 178 * Runs inside a `node:vm` sandbox with dynamic code generation disabled. 179 * 180 * Compiled functions are cached by expression string to avoid re-creating 181 * VM contexts on every invocation — critical for loops where the same 182 * expression is evaluated hundreds of times. 183 */ 184 const FORBIDDEN_EXPR_PATTERNS = /\b(constructor|__proto__|prototype|globalThis|process|require|import|eval)\b/; 185 186 /** 187 * Deep-copy plain data to sever prototype chains, preventing sandbox escape 188 * via `args.constructor.constructor('return process')()` etc. 189 * 190 * Uses a WeakMap cache keyed by object reference: when the same object 191 * (e.g. `args` or `data`) is passed repeatedly across loop iterations, 192 * the expensive JSON round-trip is performed only once. The WeakMap 193 * lets entries be GC'd when the source object is no longer referenced. 194 */ 195 /** 196 * Cache serialized JSON strings (not parsed objects) by source reference. 197 * Caching the parsed object would be unsafe: the VM sandbox could mutate it, 198 * and the polluted version would leak to subsequent calls. By caching the 199 * string and returning a fresh JSON.parse() each time, every evaluation gets 200 * its own clean deep-copy while still avoiding redundant JSON.stringify() 201 * for the same unchanged source object across loop iterations. 202 */ 203 const _sanitizeCache = new WeakMap<object, string>(); 204 205 function sanitizeContext(obj: unknown): unknown { 206 if (obj === null || obj === undefined) return obj; 207 if (typeof obj !== 'object' && typeof obj !== 'function') return obj; 208 const objRef = obj as object; 209 const cached = _sanitizeCache.get(objRef); 210 if (cached !== undefined) return JSON.parse(cached); 211 try { 212 const jsonStr = JSON.stringify(obj); 213 _sanitizeCache.set(objRef, jsonStr); 214 return JSON.parse(jsonStr); 215 } catch { 216 return {}; 217 } 218 } 219 220 /** LRU-bounded cache for compiled VM scripts — prevents unbounded memory growth. */ 221 const MAX_VM_CACHE_SIZE = 256; 222 const _vmCache = new Map<string, vm.Script>(); 223 224 function getOrCompileScript(expr: string): vm.Script { 225 let script = _vmCache.get(expr); 226 if (script) return script; 227 228 // Evict oldest entry when cache is full 229 if (_vmCache.size >= MAX_VM_CACHE_SIZE) { 230 const firstKey = _vmCache.keys().next().value; 231 if (firstKey !== undefined) _vmCache.delete(firstKey); 232 } 233 234 script = new vm.Script(`(${expr})`); 235 _vmCache.set(expr, script); 236 return script; 237 } 238 239 /** 240 * Reusable VM sandbox context. 241 * 242 * vm.createContext() is expensive (~0.3ms per call) because it creates a new 243 * V8 context with its own global object. In pipeline loops (map/filter over 244 * hundreds of items), this adds up to significant overhead. 245 * 246 * Instead, we create the context once and mutate the sandbox properties 247 * before each evaluation. This is safe because: 248 * 1. Sandbox properties are sanitized (deep-copied) before assignment 249 * 2. Scripts run with a 50ms timeout 250 * 3. codeGeneration is disabled (no eval/Function inside the sandbox) 251 */ 252 let _reusableSandbox: Record<string, unknown> | null = null; 253 let _reusableContext: vm.Context | null = null; 254 255 function getReusableContext(): { sandbox: Record<string, unknown>; context: vm.Context } { 256 if (_reusableSandbox && _reusableContext) { 257 return { sandbox: _reusableSandbox, context: _reusableContext }; 258 } 259 _reusableSandbox = { 260 args: {}, 261 item: {}, 262 data: null, 263 root: null, 264 index: 0, 265 encodeURIComponent, 266 decodeURIComponent, 267 JSON, 268 Math, 269 Number, 270 String, 271 Boolean, 272 Array, 273 Date, 274 }; 275 _reusableContext = vm.createContext(_reusableSandbox, { 276 codeGeneration: { strings: false, wasm: false }, 277 }); 278 return { sandbox: _reusableSandbox, context: _reusableContext }; 279 } 280 281 /** Properties that are part of the sandbox's initial shape and safe to keep. */ 282 const SANDBOX_WHITELIST = new Set([ 283 'args', 'item', 'data', 'root', 'index', 284 'encodeURIComponent', 'decodeURIComponent', 285 'JSON', 'Math', 'Number', 'String', 'Boolean', 'Array', 'Date', 286 ]); 287 288 function evalJsExpr(expr: string, ctx: RenderContext): unknown { 289 // Guard against absurdly long expressions that could indicate injection. 290 if (expr.length > 2000) return undefined; 291 292 // Block obvious sandbox escape attempts. 293 if (FORBIDDEN_EXPR_PATTERNS.test(expr)) return undefined; 294 295 try { 296 const script = getOrCompileScript(expr); 297 const { sandbox, context } = getReusableContext(); 298 299 // Clean non-whitelisted properties that a previous script may have added. 300 // Without this, `${{ x = 42 }}` would leak `x` into subsequent evaluations. 301 for (const key of Object.keys(sandbox)) { 302 if (!SANDBOX_WHITELIST.has(key)) { 303 delete sandbox[key]; 304 } 305 } 306 307 // Update mutable sandbox properties — sanitizeContext severs prototype chains. 308 sandbox.args = sanitizeContext(ctx.args ?? {}); 309 sandbox.item = sanitizeContext(ctx.item ?? {}); 310 sandbox.data = sanitizeContext(ctx.data); 311 sandbox.root = sanitizeContext(ctx.root); 312 sandbox.index = ctx.index ?? 0; 313 return script.runInContext(context, { timeout: 50 }); 314 } catch { 315 return undefined; 316 } 317 } 318 319 /** 320 * Normalize JavaScript source for browser evaluate() calls. 321 */ 322 export function normalizeEvaluateSource(source: string): string { 323 const stripped = source.trim(); 324 if (!stripped) return '() => undefined'; 325 if (stripped.startsWith('(') && stripped.endsWith(')()')) return `() => (${stripped})`; 326 if (/^(async\s+)?\([^)]*\)\s*=>/.test(stripped)) return stripped; 327 if (/^(async\s+)?[A-Za-z_][A-Za-z0-9_]*\s*=>/.test(stripped)) return stripped; 328 if (stripped.startsWith('function ') || stripped.startsWith('async function ')) return stripped; 329 return `() => (${stripped})`; 330 }