/ src / pipeline / template.ts
template.ts
  1  /**
  2   * Pipeline template engine: ${{ ... }} expression rendering.
  3   */
  4  
  5  import vm from 'node:vm';
  6  
  7  export interface RenderContext {
  8    args?: Record<string, unknown>;
  9    data?: unknown;
 10    root?: unknown;
 11    item?: unknown;
 12    index?: number;
 13  }
 14  
 15  import { isRecord } from '../utils.js';
 16  
 17  export function render(template: unknown, ctx: RenderContext): unknown {
 18    if (typeof template !== 'string') return template;
 19    const trimmed = template.trim();
 20    // Full expression: entire string is a single ${{ ... }}
 21    // Use [^}] to prevent matching across }} boundaries (e.g. "${{ a }}-${{ b }}")
 22    const fullMatch = trimmed.match(/^\$\{\{\s*([^}]*(?:\}[^}][^}]*)*)\s*\}\}$/);
 23    if (fullMatch && !trimmed.includes('}}-') && !trimmed.includes('}}${{')) return evalExpr(fullMatch[1].trim(), ctx);
 24    // Check if the entire string is a single expression (no other text around it)
 25    const singleExpr = trimmed.match(/^\$\{\{\s*([\s\S]*?)\s*\}\}$/);
 26    if (singleExpr) {
 27      // Verify it's truly a single expression (no other ${{ inside)
 28      const inner = singleExpr[1];
 29      if (!inner.includes('${{')) return evalExpr(inner.trim(), ctx);
 30    }
 31    return template.replace(/\$\{\{\s*(.*?)\s*\}\}/g, (_m, expr) => String(evalExpr(expr.trim(), ctx)));
 32  }
 33  
 34  export function evalExpr(expr: string, ctx: RenderContext): unknown {
 35    const args = ctx.args ?? {};
 36    const item = ctx.item ?? {};
 37    const data = ctx.data;
 38    const root = ctx.root;
 39    const index = ctx.index ?? 0;
 40  
 41    // ── Pipe filters: expr | filter1(arg) | filter2 ──
 42    // Split on single | (not ||) so "item.a || item.b | upper" works correctly.
 43    const pipeSegments = expr.split(/(?<!\|)\|(?!\|)/).map(s => s.trim());
 44    if (pipeSegments.length > 1) {
 45      let result = evalExpr(pipeSegments[0], ctx);
 46      for (let i = 1; i < pipeSegments.length; i++) {
 47        result = applyFilter(pipeSegments[i], result);
 48      }
 49      return result;
 50    }
 51  
 52    // Fast path: quoted string literal — skip VM overhead
 53    const strLit = expr.match(/^(['"])(.*)\1$/);
 54    if (strLit) return strLit[2];
 55  
 56    // Fast path: numeric literal
 57    if (/^\d+(\.\d+)?$/.test(expr)) return Number(expr);
 58  
 59    // Try resolving as a simple dotted path (item.foo.bar, args.limit, index)
 60    const resolved = resolvePath(expr, { args, item, data, root, index });
 61    if (resolved !== null && resolved !== undefined) return resolved;
 62  
 63    // Fallback: evaluate as JS in a sandboxed VM.
 64    // Handles ||, ??, arithmetic, ternary, method calls, etc. natively.
 65    return evalJsExpr(expr, { args, item, data, root, index });
 66  }
 67  
 68  /**
 69   * Apply a named filter to a value.
 70   * Supported filters:
 71   *   default(val), join(sep), upper, lower, truncate(n), trim,
 72   *   replace(old,new), keys, length, first, last, json
 73   */
 74  function applyFilter(filterExpr: string, value: unknown): unknown {
 75    const match = filterExpr.match(/^(\w+)(?:\((.+)\))?$/);
 76    if (!match) return value;
 77    const [, name, rawArgs] = match;
 78    const filterArg = rawArgs?.replace(/^['"]|['"]$/g, '') ?? '';
 79  
 80    switch (name) {
 81      case 'default': {
 82        if (value === null || value === undefined || value === '') {
 83          const intVal = parseInt(filterArg, 10);
 84          if (!Number.isNaN(intVal) && String(intVal) === filterArg.trim()) return intVal;
 85          return filterArg;
 86        }
 87        return value;
 88      }
 89      case 'join':
 90        return Array.isArray(value) ? value.join(filterArg || ', ') : value;
 91      case 'upper':
 92        return typeof value === 'string' ? value.toUpperCase() : value;
 93      case 'lower':
 94        return typeof value === 'string' ? value.toLowerCase() : value;
 95      case 'trim':
 96        return typeof value === 'string' ? value.trim() : value;
 97      case 'truncate': {
 98        const n = parseInt(filterArg, 10) || 50;
 99        return typeof value === 'string' && value.length > n ? `${value.slice(0, n)}...` : value;
100      }
101      case 'replace': {
102        if (typeof value !== 'string') return value;
103        const parts = rawArgs?.split(',').map(s => s.trim().replace(/^['"]|['"]$/g, '')) ?? [];
104        return parts.length >= 2 ? value.replaceAll(parts[0], parts[1]) : value;
105      }
106      case 'keys':
107        return value && typeof value === 'object' ? Object.keys(value) : value;
108      case 'length':
109        return Array.isArray(value) ? value.length : typeof value === 'string' ? value.length : value;
110      case 'first':
111        return Array.isArray(value) ? value[0] : value;
112      case 'last':
113        return Array.isArray(value) ? value[value.length - 1] : value;
114      case 'json':
115        return JSON.stringify(value ?? null);
116      case 'slugify':
117        // Convert to URL-safe slug
118        return typeof value === 'string'
119          ? value
120              .toLowerCase()
121              .replace(/[^\p{L}\p{N}]+/gu, '-')
122              .replace(/^-|-$/g, '')
123          : value;
124      case 'sanitize':
125        // Remove invalid filename characters
126        return typeof value === 'string'
127          // biome-ignore lint/suspicious/noControlCharactersInRegex: intentional - strips C0 control chars from filenames
128          ? value.replace(/[<>:"/\\|?*\x00-\x1f]/g, '_')
129          : value;
130      case 'ext': {
131        // Extract file extension from URL or path
132        if (typeof value !== 'string') return value;
133        const lastDot = value.lastIndexOf('.');
134        const lastSlash = Math.max(value.lastIndexOf('/'), value.lastIndexOf('\\'));
135        return lastDot > lastSlash ? value.slice(lastDot) : '';
136      }
137      case 'basename': {
138        // Extract filename from URL or path
139        if (typeof value !== 'string') return value;
140        const parts = value.split(/[/\\]/);
141        return parts[parts.length - 1] || value;
142      }
143      case 'urlencode':
144        return typeof value === 'string' ? encodeURIComponent(value) : value;
145      case 'urldecode':
146        return typeof value === 'string' ? decodeURIComponent(value) : value;
147      default:
148        return value;
149    }
150  }
151  
152  export function resolvePath(pathStr: string, ctx: RenderContext): unknown {
153    const args = ctx.args ?? {};
154    const item = ctx.item ?? {};
155    const data = ctx.data;
156    const root = ctx.root;
157    const index = ctx.index ?? 0;
158    const parts = pathStr.split('.');
159    const rootName = parts[0];
160    let obj: unknown;
161    let rest: string[];
162    if (rootName === 'args') { obj = args; rest = parts.slice(1); }
163    else if (rootName === 'item') { obj = item; rest = parts.slice(1); }
164    else if (rootName === 'data') { obj = data; rest = parts.slice(1); }
165    else if (rootName === 'root') { obj = root; rest = parts.slice(1); }
166    else if (rootName === 'index') return index;
167    else { obj = item; rest = parts; }
168    for (const part of rest) {
169      if (isRecord(obj)) obj = obj[part];
170      else if (Array.isArray(obj) && /^\d+$/.test(part)) obj = obj[parseInt(part, 10)];
171      else return null;
172    }
173    return obj;
174  }
175  
176  /**
177   * Evaluate arbitrary JS expressions as a last-resort fallback.
178   * Runs inside a `node:vm` sandbox with dynamic code generation disabled.
179   *
180   * Compiled functions are cached by expression string to avoid re-creating
181   * VM contexts on every invocation — critical for loops where the same
182   * expression is evaluated hundreds of times.
183   */
184  const FORBIDDEN_EXPR_PATTERNS = /\b(constructor|__proto__|prototype|globalThis|process|require|import|eval)\b/;
185  
186  /**
187   * Deep-copy plain data to sever prototype chains, preventing sandbox escape
188   * via `args.constructor.constructor('return process')()` etc.
189   *
190   * Uses a WeakMap cache keyed by object reference: when the same object
191   * (e.g. `args` or `data`) is passed repeatedly across loop iterations,
192   * the expensive JSON round-trip is performed only once. The WeakMap
193   * lets entries be GC'd when the source object is no longer referenced.
194   */
195  /**
196   * Cache serialized JSON strings (not parsed objects) by source reference.
197   * Caching the parsed object would be unsafe: the VM sandbox could mutate it,
198   * and the polluted version would leak to subsequent calls. By caching the
199   * string and returning a fresh JSON.parse() each time, every evaluation gets
200   * its own clean deep-copy while still avoiding redundant JSON.stringify()
201   * for the same unchanged source object across loop iterations.
202   */
203  const _sanitizeCache = new WeakMap<object, string>();
204  
205  function sanitizeContext(obj: unknown): unknown {
206    if (obj === null || obj === undefined) return obj;
207    if (typeof obj !== 'object' && typeof obj !== 'function') return obj;
208    const objRef = obj as object;
209    const cached = _sanitizeCache.get(objRef);
210    if (cached !== undefined) return JSON.parse(cached);
211    try {
212      const jsonStr = JSON.stringify(obj);
213      _sanitizeCache.set(objRef, jsonStr);
214      return JSON.parse(jsonStr);
215    } catch {
216      return {};
217    }
218  }
219  
220  /** LRU-bounded cache for compiled VM scripts — prevents unbounded memory growth. */
221  const MAX_VM_CACHE_SIZE = 256;
222  const _vmCache = new Map<string, vm.Script>();
223  
224  function getOrCompileScript(expr: string): vm.Script {
225    let script = _vmCache.get(expr);
226    if (script) return script;
227  
228    // Evict oldest entry when cache is full
229    if (_vmCache.size >= MAX_VM_CACHE_SIZE) {
230      const firstKey = _vmCache.keys().next().value;
231      if (firstKey !== undefined) _vmCache.delete(firstKey);
232    }
233  
234    script = new vm.Script(`(${expr})`);
235    _vmCache.set(expr, script);
236    return script;
237  }
238  
239  /**
240   * Reusable VM sandbox context.
241   *
242   * vm.createContext() is expensive (~0.3ms per call) because it creates a new
243   * V8 context with its own global object. In pipeline loops (map/filter over
244   * hundreds of items), this adds up to significant overhead.
245   *
246   * Instead, we create the context once and mutate the sandbox properties
247   * before each evaluation. This is safe because:
248   *   1. Sandbox properties are sanitized (deep-copied) before assignment
249   *   2. Scripts run with a 50ms timeout
250   *   3. codeGeneration is disabled (no eval/Function inside the sandbox)
251   */
252  let _reusableSandbox: Record<string, unknown> | null = null;
253  let _reusableContext: vm.Context | null = null;
254  
255  function getReusableContext(): { sandbox: Record<string, unknown>; context: vm.Context } {
256    if (_reusableSandbox && _reusableContext) {
257      return { sandbox: _reusableSandbox, context: _reusableContext };
258    }
259    _reusableSandbox = {
260      args: {},
261      item: {},
262      data: null,
263      root: null,
264      index: 0,
265      encodeURIComponent,
266      decodeURIComponent,
267      JSON,
268      Math,
269      Number,
270      String,
271      Boolean,
272      Array,
273      Date,
274    };
275    _reusableContext = vm.createContext(_reusableSandbox, {
276      codeGeneration: { strings: false, wasm: false },
277    });
278    return { sandbox: _reusableSandbox, context: _reusableContext };
279  }
280  
281  /** Properties that are part of the sandbox's initial shape and safe to keep. */
282  const SANDBOX_WHITELIST = new Set([
283    'args', 'item', 'data', 'root', 'index',
284    'encodeURIComponent', 'decodeURIComponent',
285    'JSON', 'Math', 'Number', 'String', 'Boolean', 'Array', 'Date',
286  ]);
287  
288  function evalJsExpr(expr: string, ctx: RenderContext): unknown {
289    // Guard against absurdly long expressions that could indicate injection.
290    if (expr.length > 2000) return undefined;
291  
292    // Block obvious sandbox escape attempts.
293    if (FORBIDDEN_EXPR_PATTERNS.test(expr)) return undefined;
294  
295    try {
296      const script = getOrCompileScript(expr);
297      const { sandbox, context } = getReusableContext();
298  
299      // Clean non-whitelisted properties that a previous script may have added.
300      // Without this, `${{ x = 42 }}` would leak `x` into subsequent evaluations.
301      for (const key of Object.keys(sandbox)) {
302        if (!SANDBOX_WHITELIST.has(key)) {
303          delete sandbox[key];
304        }
305      }
306  
307      // Update mutable sandbox properties — sanitizeContext severs prototype chains.
308      sandbox.args = sanitizeContext(ctx.args ?? {});
309      sandbox.item = sanitizeContext(ctx.item ?? {});
310      sandbox.data = sanitizeContext(ctx.data);
311      sandbox.root = sanitizeContext(ctx.root);
312      sandbox.index = ctx.index ?? 0;
313      return script.runInContext(context, { timeout: 50 });
314    } catch {
315      return undefined;
316    }
317  }
318  
319  /**
320   * Normalize JavaScript source for browser evaluate() calls.
321   */
322  export function normalizeEvaluateSource(source: string): string {
323    const stripped = source.trim();
324    if (!stripped) return '() => undefined';
325    if (stripped.startsWith('(') && stripped.endsWith(')()')) return `() => (${stripped})`;
326    if (/^(async\s+)?\([^)]*\)\s*=>/.test(stripped)) return stripped;
327    if (/^(async\s+)?[A-Za-z_][A-Za-z0-9_]*\s*=>/.test(stripped)) return stripped;
328    if (stripped.startsWith('function ') || stripped.startsWith('async function ')) return stripped;
329    return `() => (${stripped})`;
330  }