Cradicle Explorer

/ src / agents / utils / task-manager.js
task-manager.js
  1  /**
  2   * Task Manager Utility
  3   *
  4   * CRUD operations for agent_tasks table.
  5   * Provides interface for creating, updating, querying, and completing tasks.
  6   */
  7  
  8  import { run, getOne, getAll } from '../../utils/db.js';
  9  import { spawn } from 'child_process';
 10  import { loadavg, cpus } from 'os';
 11  import Logger from '../../utils/logger.js';
 12  import { distance as levenshtein } from 'fastest-levenshtein';
 13  import { simpleLLMCall } from './agent-claude-api.js';
 14  
 15  const logger = new Logger('TaskManager');
 16  
 17  /**
 18   * Find duplicate task to prevent creating duplicates
 19   *
 20   * @param {Object} taskData - Task data to check
 21   * @param {string} taskData.task_type - Task type
 22   * @param {string} taskData.assigned_to - Assigned agent
 23   * @param {Object} taskData.context - Task context
 24   * @returns {Promise<Object|null>} - Existing task or null
 25   */
 26  async function findDuplicateTask({ task_type, assigned_to, context }) {
 27    if (!context) {
 28      return null;
 29    }
 30  
 31    // Don't deduplicate retry tasks - they intentionally re-process the same error
 32    if (
 33      context.previous_attempt_failed ||
 34      (context.retry_count !== null && context.retry_count > 0)
 35    ) {
 36      return null;
 37    }
 38  
 39    // Extract deduplication key based on task type
 40    let dedupeKey = null;
 41    let dedupeField = null;
 42  
 43    switch (task_type) {
 44      case 'fix_bug':
 45      case 'classify_error':
 46        // Dedupe by error_message (first 100 chars for similarity)
 47        dedupeKey = context.error_message?.substring(0, 100);
 48        dedupeField = 'error_message';
 49        break;
 50  
 51      case 'design_optimization':
 52        // Dedupe by optimization description or pattern
 53        dedupeKey = context.description?.substring(0, 100) || context.pattern;
 54        dedupeField = 'description';
 55        break;
 56  
 57      case 'scan_logs':
 58      case 'check_agent_health':
 59      case 'check_pipeline_health':
 60      case 'check_process_compliance':
 61      case 'detect_anomaly':
 62      case 'check_slo_compliance':
 63        // Monitoring tasks: only one active at a time per type
 64        dedupeKey = task_type;
 65        dedupeField = null;
 66        break;
 67  
 68      default:
 69        // For other task types, allow duplicates (features, refactors, etc.)
 70        return null;
 71    }
 72  
 73    if (!dedupeKey) {
 74      return null;
 75    }
 76  
 77    try {
 78      // Check for existing task with same key and active status
 79      let exactMatch;
 80  
 81      if (dedupeField) {
 82        // Search by JSONB field (check active tasks OR recently completed within 5 min)
 83        exactMatch = await getOne(
 84          `SELECT id, status FROM tel.agent_tasks
 85           WHERE task_type = $1
 86             AND assigned_to = $2
 87             AND (
 88               status IN ('pending', 'running', 'blocked')
 89               OR (status = 'completed' AND created_at > NOW() - INTERVAL '5 minutes')
 90             )
 91             AND LEFT(context_json->>'${dedupeField}', 100) = $3
 92           ORDER BY created_at DESC
 93           LIMIT 1`,
 94          [task_type, assigned_to, dedupeKey]
 95        );
 96      } else {
 97        // Search by task_type only (monitoring tasks)
 98        exactMatch = await getOne(
 99          `SELECT id, status FROM tel.agent_tasks
100           WHERE task_type = $1
101             AND assigned_to = $2
102             AND (
103               status IN ('pending', 'running', 'blocked')
104               OR (status = 'completed' AND created_at > NOW() - INTERVAL '5 minutes')
105             )
106           ORDER BY created_at DESC
107           LIMIT 1`,
108          [task_type, assigned_to]
109        );
110      }
111  
112      if (exactMatch) {
113        return exactMatch;
114      }
115  
116      // Fuzzy matching fallback for errors (70% threshold + Haiku for borderline)
117      if (dedupeField === 'error_message' && dedupeKey) {
118        return await findFuzzyDuplicate(task_type, assigned_to, dedupeKey);
119      }
120  
121      return null;
122    } catch (error) {
123      // If deduplication check fails, log and allow task creation
124      logger.error('Deduplication check failed:', error.message);
125      return null;
126    }
127  }
128  
129  /**
130   * Find fuzzy duplicate using Jaccard + Levenshtein (70% threshold),
131   * with Haiku LLM fallback for borderline cases (40-70% similarity).
132   *
133   * Lookback window: 30 minutes for completed tasks (vs 5 min previously).
134   * This prevents bounce loops where a triage task completes quickly and
135   * the next cron cycle can't see it as a recent duplicate.
136   */
137  async function findFuzzyDuplicate(task_type, assigned_to, errorMessage) {
138    try {
139      const candidates = await getAll(
140        `SELECT id, status, context_json->>'error_message' as error_msg
141         FROM tel.agent_tasks WHERE task_type = $1 AND assigned_to = $2
142         AND (status IN ('pending','running','blocked') OR (status='completed' AND created_at > NOW() - INTERVAL '30 minutes'))
143         ORDER BY created_at DESC LIMIT 20`,
144        [task_type, assigned_to]
145      );
146  
147      let bestMatch = null;
148      let bestSimilarity = 0;
149      const borderlineCandidates = [];
150  
151      for (const c of candidates) {
152        if (!c.error_msg) continue;
153        const sim = calculateSimilarity(errorMessage, c.error_msg);
154        if (sim >= 0.7 && sim > bestSimilarity) {
155          bestSimilarity = sim;
156          bestMatch = { id: c.id, status: c.status, similarity: sim };
157        } else if (sim >= 0.4 && sim < 0.7) {
158          borderlineCandidates.push({ ...c, similarity: sim });
159        }
160      }
161  
162      if (bestMatch) return bestMatch;
163  
164      // Haiku fallback: ask LLM to judge semantic equivalence for borderline cases
165      if (borderlineCandidates.length > 0) {
166        // Check top borderline candidate only (cheapest)
167        borderlineCandidates.sort((a, b) => b.similarity - a.similarity);
168        const candidate = borderlineCandidates[0];
169        try {
170          const prompt = `Are these two error messages describing the same underlying issue?
171  
172  Error 1: ${errorMessage.substring(0, 200)}
173  Error 2: ${candidate.error_msg.substring(0, 200)}
174  
175  Reply with exactly one word: YES or NO`;
176          const response = await simpleLLMCall('task-manager', null, {
177            prompt,
178            systemPrompt: 'You are a triage assistant. Reply only YES or NO.',
179            maxTokens: 5,
180            model: process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5',
181          });
182          if (response.trim().toUpperCase().startsWith('YES')) {
183            logger.info(
184              `Haiku dedup: matched "${errorMessage.substring(0, 60)}" to task #${candidate.id} (sim=${candidate.similarity.toFixed(2)})`
185            );
186            return { id: candidate.id, status: candidate.status, similarity: 0.85 };
187          }
188        } catch (haikuErr) {
189          // Haiku call failed — log and allow task creation rather than blocking
190          logger.warn('Haiku dedup fallback failed:', haikuErr.message);
191        }
192      }
193  
194      return null;
195    } catch (e) {
196      logger.error('Fuzzy matching failed:', e.message);
197      return null;
198    }
199  }
200  
201  /**
202   * Calculate similarity (Jaccard + Levenshtein, 50/50)
203   */
204  function calculateSimilarity(s1, s2) {
205    if (!s1 && !s2) return 1;
206    if (!s1 || !s2) return 0;
207    if (s1 === s2) return 1;
208  
209    const n1 = s1.toLowerCase().trim();
210    const n2 = s2.toLowerCase().trim();
211  
212    // Jaccard
213    const w1 = new Set(n1.split(/\s+/).filter(w => w.length > 0));
214    const w2 = new Set(n2.split(/\s+/).filter(w => w.length > 0));
215    const inter = new Set([...w1].filter(w => w2.has(w)));
216    const union = new Set([...w1, ...w2]);
217    const jac = union.size === 0 ? 0 : inter.size / union.size;
218  
219    // Levenshtein
220    const dist = levenshtein(n1, n2);
221    const maxLen = Math.max(n1.length, n2.length);
222    const lev = maxLen === 0 ? 1 : 1 - dist / maxLen;
223  
224    return (jac + lev) / 2;
225  }
226  
227  /**
228   * Create a new agent task
229   *
230   * @param {Object} taskData - Task data
231   * @param {string} taskData.task_type - Type of task (e.g., 'classify_error', 'fix_bug')
232   * @param {string} taskData.assigned_to - Agent to assign to ('developer', 'qa', 'security', etc.)
233   * @param {string} [taskData.created_by] - Agent creating the task
234   * @param {number} [taskData.priority=5] - Priority (1-10)
235   * @param {Object} [taskData.context] - Context object (will be JSON-stringified)
236   * @param {number} [taskData.parent_task_id] - Parent task ID for workflow chains
237   * @returns {Promise<number>} - ID of created task
238   *
239   * @example
240   * const taskId = await createAgentTask({
241   *   task_type: 'fix_bug',
242   *   assigned_to: 'developer',
243   *   created_by: 'triage',
244   *   priority: 7,
245   *   context: { error_type: 'null_pointer', file: 'src/scoring.js' }
246   * });
247   */
248  export async function createAgentTask(taskData) {
249    const {
250      task_type,
251      assigned_to,
252      created_by = null,
253      priority = 5,
254      context = null,
255      parent_task_id = null,
256    } = taskData;
257  
258    // Validate required fields
259    if (!task_type || !assigned_to) {
260      throw new Error('task_type and assigned_to are required');
261    }
262  
263    // Validate assigned_to
264    const validAgents = ['developer', 'qa', 'security', 'architect', 'triage', 'monitor'];
265    if (!validAgents.includes(assigned_to)) {
266      throw new Error(
267        `Invalid assigned_to: ${assigned_to}. Must be one of: ${validAgents.join(', ')}`
268      );
269    }
270  
271    // Validate priority
272    if (priority < 1 || priority > 10) {
273      throw new Error('priority must be between 1 and 10');
274    }
275  
276    // DEDUPLICATION: Check for existing similar task
277    const existingTask = await findDuplicateTask({ task_type, assigned_to, context });
278    if (existingTask) {
279      logger.info(
280        `Task already exists (skipping duplicate): #${existingTask.id} ${task_type} (${existingTask.status})`
281      );
282      return existingTask.id;
283    }
284  
285    const result = await run(
286      `INSERT INTO tel.agent_tasks (task_type, assigned_to, created_by, priority, context_json, parent_task_id)
287       VALUES ($1, $2, $3, $4, $5, $6)
288       RETURNING id`,
289      [
290        task_type,
291        assigned_to,
292        created_by,
293        priority,
294        context ? JSON.stringify(context) : null,
295        parent_task_id,
296      ]
297    );
298  
299    const taskId = result.lastInsertRowid;
300  
301    // Spawn agent immediately if real-time notifications enabled
302    if (process.env.AGENT_REALTIME_NOTIFICATIONS !== 'false') {
303      spawnAgentAsync(assigned_to, taskId);
304    }
305  
306    return taskId;
307  }
308  
309  /**
310   * Get tasks assigned to a specific agent
311   *
312   * @param {string} agentName - Agent name
313   * @param {string} [status='pending'] - Task status filter
314   * @param {number} [limit=5] - Maximum number of tasks to return
315   * @returns {Promise<Array>} - Array of task objects
316   *
317   * @example
318   * const tasks = await getAgentTasks('developer', 'pending', 5);
319   * for (const task of tasks) {
320   *   await agent.processTask(task);
321   * }
322   */
323  export async function getAgentTasks(agentName, status = 'pending', limit = 5) {
324    const tasks = await getAll(
325      `SELECT * FROM tel.agent_tasks
326       WHERE assigned_to = $1 AND status = $2
327       ORDER BY priority DESC, created_at ASC
328       LIMIT $3`,
329      [agentName, status, limit]
330    );
331  
332    // Parse context_json and result_json
333    return tasks.map(task => ({
334      ...task,
335      context_json: task.context_json ? JSON.parse(task.context_json) : null,
336      result_json: task.result_json ? JSON.parse(task.result_json) : null,
337    }));
338  }
339  
340  /**
341   * Update task status
342   *
343   * @param {number} taskId - Task ID
344   * @param {string} status - New status ('pending', 'running', 'completed', 'failed', 'blocked')
345   * @param {Object} [updates={}] - Additional fields to update
346   * @returns {Promise<void>}
347   *
348   * @example
349   * await updateTaskStatus(123, 'running');
350   * await updateTaskStatus(123, 'failed', { error_message: 'API timeout' });
351   */
352  export async function updateTaskStatus(taskId, status, updates = {}) {
353    // Validate status
354    const validStatuses = [
355      'pending',
356      'running',
357      'completed',
358      'failed',
359      'blocked',
360      'awaiting_po_approval',
361      'awaiting_architect_approval',
362    ];
363    if (!validStatuses.includes(status)) {
364      throw new Error(`Invalid status: ${status}. Must be one of: ${validStatuses.join(', ')}`);
365    }
366  
367    const fields = ['status = $1'];
368    const values = [status];
369    let paramIdx = 2;
370  
371    // Add timestamp based on status
372    if (status === 'running' && !updates.started_at) {
373      fields.push('started_at = NOW()');
374    } else if ((status === 'completed' || status === 'failed') && !updates.completed_at) {
375      fields.push('completed_at = NOW()');
376    }
377  
378    // Add other updates
379    for (const [key, value] of Object.entries(updates)) {
380      if (key === 'result' || key === 'context') {
381        // JSON fields
382        fields.push(`${key}_json = $${paramIdx}`);
383        values.push(JSON.stringify(value));
384      } else {
385        fields.push(`${key} = $${paramIdx}`);
386        values.push(value);
387      }
388      paramIdx++;
389    }
390  
391    values.push(taskId);
392  
393    await run(
394      `UPDATE tel.agent_tasks
395       SET ${fields.join(', ')}
396       WHERE id = $${paramIdx}`,
397      values
398    );
399  }
400  
401  /**
402   * Mark task as running
403   *
404   * @param {number} taskId - Task ID
405   * @returns {Promise<void>}
406   *
407   * @example
408   * await startTask(123);
409   */
410  export async function startTask(taskId) {
411    await updateTaskStatus(taskId, 'running');
412  }
413  
414  /**
415   * Mark task as completed
416   *
417   * @param {number} taskId - Task ID
418   * @param {Object} [result] - Result object (will be JSON-stringified)
419   * @returns {Promise<void>}
420   *
421   * @example
422   * await completeTask(123, { files_changed: ['src/scoring.js'], fix_commit: 'abc123' });
423   */
424  export async function completeTask(taskId, result = null) {
425    await updateTaskStatus(taskId, 'completed', result ? { result } : {});
426  }
427  
428  /**
429   * Mark task as failed
430   *
431   * @param {number} taskId - Task ID
432   * @param {string} errorMessage - Error message
433   * @param {number} [retryCount] - Current retry count
434   * @returns {Promise<void>}
435   *
436   * @example
437   * await failTask(123, 'API timeout after 3 retries', 3);
438   */
439  export async function failTask(taskId, errorMessage, retryCount = null) {
440    const updates = { error_message: errorMessage };
441    if (retryCount !== null) {
442      updates.retry_count = retryCount;
443    }
444    await updateTaskStatus(taskId, 'failed', updates);
445  }
446  
447  /**
448   * Mark task as blocked
449   *
450   * @param {number} taskId - Task ID
451   * @param {string} reason - Reason for blocking (e.g., 'Coverage below 80%')
452   * @returns {Promise<void>}
453   *
454   * @example
455   * await blockTask(123, 'Coverage below 80% for src/scoring.js (current: 65%)');
456   */
457  export async function blockTask(taskId, reason) {
458    await updateTaskStatus(taskId, 'blocked', { error_message: reason });
459  }
460  
461  /**
462   * Get task by ID
463   *
464   * @param {number} taskId - Task ID
465   * @returns {Promise<Object|null>} - Task object or null if not found
466   *
467   * @example
468   * const task = await getTaskById(123);
469   * if (task) {
470   *   console.log(`Task ${task.id}: ${task.task_type} - ${task.status}`);
471   * }
472   */
473  export async function getTaskById(taskId) {
474    const task = await getOne('SELECT * FROM tel.agent_tasks WHERE id = $1', [taskId]);
475  
476    if (!task) return null;
477  
478    let contextJson = null;
479    let resultJson = null;
480    try {
481      contextJson = task.context_json ? JSON.parse(task.context_json) : null;
482    } catch (_e) {
483      // Malformed JSON - return null
484    }
485    try {
486      resultJson = task.result_json ? JSON.parse(task.result_json) : null;
487    } catch (_e) {
488      // Malformed JSON - return null
489    }
490    return {
491      ...task,
492      context_json: contextJson,
493      result_json: resultJson,
494    };
495  }
496  
497  /**
498   * Get child tasks of a parent task
499   *
500   * @param {number} parentTaskId - Parent task ID
501   * @returns {Promise<Array>} - Array of child task objects
502   *
503   * @example
504   * const children = await getChildTasks(123);
505   * console.log(`Task 123 has ${children.length} child tasks`);
506   */
507  export async function getChildTasks(parentTaskId) {
508    const tasks = await getAll(
509      `SELECT * FROM tel.agent_tasks
510       WHERE parent_task_id = $1
511       ORDER BY created_at ASC`,
512      [parentTaskId]
513    );
514  
515    return tasks.map(task => ({
516      ...task,
517      context_json: task.context_json ? JSON.parse(task.context_json) : null,
518      result_json: task.result_json ? JSON.parse(task.result_json) : null,
519    }));
520  }
521  
522  /**
523   * Increment retry count for a task
524   *
525   * @param {number} taskId - Task ID
526   * @returns {Promise<number>} - New retry count
527   *
528   * @example
529   * const retryCount = await incrementRetryCount(123);
530   * if (retryCount > 3) {
531   *   await failTask(123, 'Max retries exceeded');
532   * }
533   */
534  export async function incrementRetryCount(taskId) {
535    const result = await getOne(
536      `UPDATE tel.agent_tasks
537       SET retry_count = retry_count + 1
538       WHERE id = $1
539       RETURNING retry_count`,
540      [taskId]
541    );
542    return result ? result.retry_count : 0;
543  }
544  
545  /**
546   * Check if task exists and is in expected status
547   *
548   * @param {number} taskId - Task ID
549   * @param {string} [expectedStatus] - Expected status to check
550   * @returns {Promise<boolean>} - True if task exists (and matches status if provided)
551   *
552   * @example
553   * if (await taskExists(123, 'pending')) {
554   *   await startTask(123);
555   * }
556   */
557  export async function taskExists(taskId, expectedStatus = null) {
558    const task = await getTaskById(taskId);
559    if (!task) return false;
560    if (expectedStatus && task.status !== expectedStatus) return false;
561    return true;
562  }
563  
564  /**
565   * Get agent task statistics
566   *
567   * @param {string} agentName - Agent name
568   * @param {number} [hours=24] - Hours to look back
569   * @returns {Promise<Object>} - Statistics object
570   *
571   * @example
572   * const stats = await getAgentStats('developer', 24);
573   * console.log(`Success rate: ${stats.success_rate * 100}%`);
574   */
575  export async function getAgentStats(agentName, hours = 24) {
576    const stats = await getOne(
577      `SELECT
578         COUNT(*) as total,
579         SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
580         SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
581         SUM(CASE WHEN status = 'blocked' THEN 1 ELSE 0 END) as blocked,
582         SUM(CASE WHEN status = 'running' THEN 1 ELSE 0 END) as running,
583         SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending
584       FROM tel.agent_tasks
585       WHERE assigned_to = $1
586         AND created_at > NOW() - ($2 || ' hours')::interval`,
587      [agentName, hours]
588    );
589  
590    const total = parseInt(stats.total, 10);
591    const completed = parseInt(stats.completed, 10);
592    const failed = parseInt(stats.failed, 10);
593    const success_rate = total > 0 ? completed / total : 0;
594    const failure_rate = total > 0 ? failed / total : 0;
595  
596    return {
597      ...stats,
598      total,
599      completed,
600      failed,
601      blocked: parseInt(stats.blocked, 10),
602      running: parseInt(stats.running, 10),
603      pending: parseInt(stats.pending, 10),
604      success_rate,
605      failure_rate,
606    };
607  }
608  
609  /**
610   * Check if an agent is currently running
611   *
612   * @param {string} agentName - Agent name
613   * @returns {Promise<boolean>} - True if agent is actively working
614   *
615   * @example
616   * if (!await isAgentRunning('qa')) {
617   *   spawnAgentAsync('qa', taskId);
618   * }
619   */
620  export async function isAgentRunning(agentName) {
621    const staleMinutes = parseInt(process.env.AGENT_LOCK_STALE_MINUTES || '2', 10);
622  
623    const result = await getOne(
624      `SELECT status, last_active
625       FROM tel.agent_state
626       WHERE agent_name = $1
627         AND status = 'working'
628         AND last_active > NOW() - ($2 || ' minutes')::interval`,
629      [agentName, staleMinutes]
630    );
631    return !!result;
632  }
633  
634  /**
635   * Spawn agent process immediately to handle new task
636   *
637   * @param {string} agentName - Agent name to spawn
638   * @param {number} taskId - Task ID that triggered the spawn
639   *
640   * @example
641   * spawnAgentAsync('developer', 42); // Fire and forget
642   */
643  export function spawnAgentAsync(agentName, taskId) {
644    // 1. Load-based throttle: skip if CPU is busy (let existing agents drain first)
645    const LOAD_THRESHOLD = parseFloat(process.env.AGENT_SPAWN_LOAD_THRESHOLD || '0.7');
646    const normalizedLoad = loadavg()[0] / cpus().length;
647    if (normalizedLoad >= LOAD_THRESHOLD) {
648      logger.info(
649        `CPU load ${normalizedLoad.toFixed(2)} >= ${LOAD_THRESHOLD}, skipping spawn of ${agentName} (task ${taskId})`
650      );
651      return;
652    }
653  
654    // 2. Per-type pool check and global rate limit are async — fire-and-forget
655    _spawnAgentAsyncInner(agentName, taskId, normalizedLoad).catch(err => {
656      logger.warn(`Spawn check error for ${agentName}:`, err.message);
657    });
658  }
659  
660  async function _spawnAgentAsyncInner(agentName, taskId, normalizedLoad) {
661    // Per-type pool check: enforce max concurrent instances per agent type
662    const staleMinutes = parseInt(process.env.AGENT_LOCK_STALE_MINUTES || '2', 10);
663    const POOL_DEFAULTS = { developer: 3, qa: 3 };
664    const envKey = `AGENT_MAX_${agentName.toUpperCase()}`;
665    const typeMax = parseInt(process.env[envKey] ?? POOL_DEFAULTS[agentName] ?? '1', 10);
666  
667    const countRow = await getOne(
668      `SELECT COUNT(*) as count FROM tel.agent_state
669       WHERE agent_name = $1
670         AND status = 'working'
671         AND last_active > NOW() - ($2 || ' minutes')::interval`,
672      [agentName, staleMinutes]
673    );
674    const runningCount = parseInt(countRow?.count || '0', 10);
675    if (runningCount >= typeMax) {
676      logger.info(
677        `Agent ${agentName} at pool limit (${runningCount}/${typeMax}), skipping spawn (task ${taskId})`
678      );
679      return;
680    }
681  
682    // Global rate limit: max 1 new agent spawn per minute across all types
683    const RATE_LOCK_KEY = 'agent_spawn_ratelimit';
684    const RATE_INTERVAL_S = parseInt(process.env.AGENT_SPAWN_INTERVAL_S || '60', 10);
685  
686    const lastSpawn = await getOne(
687      'SELECT updated_at FROM ops.cron_locks WHERE lock_key = $1',
688      [RATE_LOCK_KEY]
689    );
690    if (lastSpawn) {
691      const secondsSince = (Date.now() - new Date(lastSpawn.updated_at).getTime()) / 1000;
692      if (secondsSince < RATE_INTERVAL_S) {
693        logger.info(
694          `Spawn rate limit: last spawn was ${Math.round(secondsSince)}s ago, skipping ${agentName} (task ${taskId})`
695        );
696        return;
697      }
698    }
699  
700    try {
701      logger.info(
702        `Spawning agent ${agentName} for task ${taskId} (load: ${normalizedLoad.toFixed(2)})`
703      );
704  
705      // Record spawn timestamp for rate limiting
706      await run(
707        `INSERT INTO ops.cron_locks (lock_key, description, updated_at)
708         VALUES ($1, 'Global agent spawn rate limiter (1/min)', NOW())
709         ON CONFLICT (lock_key) DO UPDATE SET
710           description = EXCLUDED.description,
711           updated_at = EXCLUDED.updated_at`,
712        [RATE_LOCK_KEY]
713      );
714  
715      // Spawn agent at low priority. NOT detached — parent waits for child so Node.js
716      // can reap it on exit (prevents zombie accumulation when codium is PID 1).
717      const child = spawn('nice', ['-n', '19', 'npm', 'run', 'agent:run:single', agentName], {
718        stdio: 'ignore',
719        cwd: process.cwd(),
720        env: process.env,
721      });
722  
723      child.on('exit', (code, signal) => {
724        logger.info(`Agent ${agentName} (task ${taskId}) exited: code=${code} signal=${signal}`);
725      });
726  
727      child.on('error', err => {
728        logger.warn(`Agent ${agentName} spawn error: ${err.message}`);
729      });
730    } catch (error) {
731      logger.error(`Failed to spawn agent ${agentName}:`, error);
732      // Don't throw - fallback to scheduled polling
733    }
734  }