task-manager.js
1 /** 2 * Task Manager Utility 3 * 4 * CRUD operations for agent_tasks table. 5 * Provides interface for creating, updating, querying, and completing tasks. 6 */ 7 8 import { run, getOne, getAll } from '../../utils/db.js'; 9 import { spawn } from 'child_process'; 10 import { loadavg, cpus } from 'os'; 11 import Logger from '../../utils/logger.js'; 12 import { distance as levenshtein } from 'fastest-levenshtein'; 13 import { simpleLLMCall } from './agent-claude-api.js'; 14 15 const logger = new Logger('TaskManager'); 16 17 /** 18 * Find duplicate task to prevent creating duplicates 19 * 20 * @param {Object} taskData - Task data to check 21 * @param {string} taskData.task_type - Task type 22 * @param {string} taskData.assigned_to - Assigned agent 23 * @param {Object} taskData.context - Task context 24 * @returns {Promise<Object|null>} - Existing task or null 25 */ 26 async function findDuplicateTask({ task_type, assigned_to, context }) { 27 if (!context) { 28 return null; 29 } 30 31 // Don't deduplicate retry tasks - they intentionally re-process the same error 32 if ( 33 context.previous_attempt_failed || 34 (context.retry_count !== null && context.retry_count > 0) 35 ) { 36 return null; 37 } 38 39 // Extract deduplication key based on task type 40 let dedupeKey = null; 41 let dedupeField = null; 42 43 switch (task_type) { 44 case 'fix_bug': 45 case 'classify_error': 46 // Dedupe by error_message (first 100 chars for similarity) 47 dedupeKey = context.error_message?.substring(0, 100); 48 dedupeField = 'error_message'; 49 break; 50 51 case 'design_optimization': 52 // Dedupe by optimization description or pattern 53 dedupeKey = context.description?.substring(0, 100) || context.pattern; 54 dedupeField = 'description'; 55 break; 56 57 case 'scan_logs': 58 case 'check_agent_health': 59 case 'check_pipeline_health': 60 case 'check_process_compliance': 61 case 'detect_anomaly': 62 case 'check_slo_compliance': 63 // Monitoring tasks: only one active at a time per type 64 dedupeKey = task_type; 65 dedupeField = null; 66 break; 67 68 default: 69 // For other task types, allow duplicates (features, refactors, etc.) 70 return null; 71 } 72 73 if (!dedupeKey) { 74 return null; 75 } 76 77 try { 78 // Check for existing task with same key and active status 79 let exactMatch; 80 81 if (dedupeField) { 82 // Search by JSONB field (check active tasks OR recently completed within 5 min) 83 exactMatch = await getOne( 84 `SELECT id, status FROM tel.agent_tasks 85 WHERE task_type = $1 86 AND assigned_to = $2 87 AND ( 88 status IN ('pending', 'running', 'blocked') 89 OR (status = 'completed' AND created_at > NOW() - INTERVAL '5 minutes') 90 ) 91 AND LEFT(context_json->>'${dedupeField}', 100) = $3 92 ORDER BY created_at DESC 93 LIMIT 1`, 94 [task_type, assigned_to, dedupeKey] 95 ); 96 } else { 97 // Search by task_type only (monitoring tasks) 98 exactMatch = await getOne( 99 `SELECT id, status FROM tel.agent_tasks 100 WHERE task_type = $1 101 AND assigned_to = $2 102 AND ( 103 status IN ('pending', 'running', 'blocked') 104 OR (status = 'completed' AND created_at > NOW() - INTERVAL '5 minutes') 105 ) 106 ORDER BY created_at DESC 107 LIMIT 1`, 108 [task_type, assigned_to] 109 ); 110 } 111 112 if (exactMatch) { 113 return exactMatch; 114 } 115 116 // Fuzzy matching fallback for errors (70% threshold + Haiku for borderline) 117 if (dedupeField === 'error_message' && dedupeKey) { 118 return await findFuzzyDuplicate(task_type, assigned_to, dedupeKey); 119 } 120 121 return null; 122 } catch (error) { 123 // If deduplication check fails, log and allow task creation 124 logger.error('Deduplication check failed:', error.message); 125 return null; 126 } 127 } 128 129 /** 130 * Find fuzzy duplicate using Jaccard + Levenshtein (70% threshold), 131 * with Haiku LLM fallback for borderline cases (40-70% similarity). 132 * 133 * Lookback window: 30 minutes for completed tasks (vs 5 min previously). 134 * This prevents bounce loops where a triage task completes quickly and 135 * the next cron cycle can't see it as a recent duplicate. 136 */ 137 async function findFuzzyDuplicate(task_type, assigned_to, errorMessage) { 138 try { 139 const candidates = await getAll( 140 `SELECT id, status, context_json->>'error_message' as error_msg 141 FROM tel.agent_tasks WHERE task_type = $1 AND assigned_to = $2 142 AND (status IN ('pending','running','blocked') OR (status='completed' AND created_at > NOW() - INTERVAL '30 minutes')) 143 ORDER BY created_at DESC LIMIT 20`, 144 [task_type, assigned_to] 145 ); 146 147 let bestMatch = null; 148 let bestSimilarity = 0; 149 const borderlineCandidates = []; 150 151 for (const c of candidates) { 152 if (!c.error_msg) continue; 153 const sim = calculateSimilarity(errorMessage, c.error_msg); 154 if (sim >= 0.7 && sim > bestSimilarity) { 155 bestSimilarity = sim; 156 bestMatch = { id: c.id, status: c.status, similarity: sim }; 157 } else if (sim >= 0.4 && sim < 0.7) { 158 borderlineCandidates.push({ ...c, similarity: sim }); 159 } 160 } 161 162 if (bestMatch) return bestMatch; 163 164 // Haiku fallback: ask LLM to judge semantic equivalence for borderline cases 165 if (borderlineCandidates.length > 0) { 166 // Check top borderline candidate only (cheapest) 167 borderlineCandidates.sort((a, b) => b.similarity - a.similarity); 168 const candidate = borderlineCandidates[0]; 169 try { 170 const prompt = `Are these two error messages describing the same underlying issue? 171 172 Error 1: ${errorMessage.substring(0, 200)} 173 Error 2: ${candidate.error_msg.substring(0, 200)} 174 175 Reply with exactly one word: YES or NO`; 176 const response = await simpleLLMCall('task-manager', null, { 177 prompt, 178 systemPrompt: 'You are a triage assistant. Reply only YES or NO.', 179 maxTokens: 5, 180 model: process.env.CLAUDE_HAIKU_MODEL || 'anthropic/claude-haiku-4-5', 181 }); 182 if (response.trim().toUpperCase().startsWith('YES')) { 183 logger.info( 184 `Haiku dedup: matched "${errorMessage.substring(0, 60)}" to task #${candidate.id} (sim=${candidate.similarity.toFixed(2)})` 185 ); 186 return { id: candidate.id, status: candidate.status, similarity: 0.85 }; 187 } 188 } catch (haikuErr) { 189 // Haiku call failed — log and allow task creation rather than blocking 190 logger.warn('Haiku dedup fallback failed:', haikuErr.message); 191 } 192 } 193 194 return null; 195 } catch (e) { 196 logger.error('Fuzzy matching failed:', e.message); 197 return null; 198 } 199 } 200 201 /** 202 * Calculate similarity (Jaccard + Levenshtein, 50/50) 203 */ 204 function calculateSimilarity(s1, s2) { 205 if (!s1 && !s2) return 1; 206 if (!s1 || !s2) return 0; 207 if (s1 === s2) return 1; 208 209 const n1 = s1.toLowerCase().trim(); 210 const n2 = s2.toLowerCase().trim(); 211 212 // Jaccard 213 const w1 = new Set(n1.split(/\s+/).filter(w => w.length > 0)); 214 const w2 = new Set(n2.split(/\s+/).filter(w => w.length > 0)); 215 const inter = new Set([...w1].filter(w => w2.has(w))); 216 const union = new Set([...w1, ...w2]); 217 const jac = union.size === 0 ? 0 : inter.size / union.size; 218 219 // Levenshtein 220 const dist = levenshtein(n1, n2); 221 const maxLen = Math.max(n1.length, n2.length); 222 const lev = maxLen === 0 ? 1 : 1 - dist / maxLen; 223 224 return (jac + lev) / 2; 225 } 226 227 /** 228 * Create a new agent task 229 * 230 * @param {Object} taskData - Task data 231 * @param {string} taskData.task_type - Type of task (e.g., 'classify_error', 'fix_bug') 232 * @param {string} taskData.assigned_to - Agent to assign to ('developer', 'qa', 'security', etc.) 233 * @param {string} [taskData.created_by] - Agent creating the task 234 * @param {number} [taskData.priority=5] - Priority (1-10) 235 * @param {Object} [taskData.context] - Context object (will be JSON-stringified) 236 * @param {number} [taskData.parent_task_id] - Parent task ID for workflow chains 237 * @returns {Promise<number>} - ID of created task 238 * 239 * @example 240 * const taskId = await createAgentTask({ 241 * task_type: 'fix_bug', 242 * assigned_to: 'developer', 243 * created_by: 'triage', 244 * priority: 7, 245 * context: { error_type: 'null_pointer', file: 'src/scoring.js' } 246 * }); 247 */ 248 export async function createAgentTask(taskData) { 249 const { 250 task_type, 251 assigned_to, 252 created_by = null, 253 priority = 5, 254 context = null, 255 parent_task_id = null, 256 } = taskData; 257 258 // Validate required fields 259 if (!task_type || !assigned_to) { 260 throw new Error('task_type and assigned_to are required'); 261 } 262 263 // Validate assigned_to 264 const validAgents = ['developer', 'qa', 'security', 'architect', 'triage', 'monitor']; 265 if (!validAgents.includes(assigned_to)) { 266 throw new Error( 267 `Invalid assigned_to: ${assigned_to}. Must be one of: ${validAgents.join(', ')}` 268 ); 269 } 270 271 // Validate priority 272 if (priority < 1 || priority > 10) { 273 throw new Error('priority must be between 1 and 10'); 274 } 275 276 // DEDUPLICATION: Check for existing similar task 277 const existingTask = await findDuplicateTask({ task_type, assigned_to, context }); 278 if (existingTask) { 279 logger.info( 280 `Task already exists (skipping duplicate): #${existingTask.id} ${task_type} (${existingTask.status})` 281 ); 282 return existingTask.id; 283 } 284 285 const result = await run( 286 `INSERT INTO tel.agent_tasks (task_type, assigned_to, created_by, priority, context_json, parent_task_id) 287 VALUES ($1, $2, $3, $4, $5, $6) 288 RETURNING id`, 289 [ 290 task_type, 291 assigned_to, 292 created_by, 293 priority, 294 context ? JSON.stringify(context) : null, 295 parent_task_id, 296 ] 297 ); 298 299 const taskId = result.lastInsertRowid; 300 301 // Spawn agent immediately if real-time notifications enabled 302 if (process.env.AGENT_REALTIME_NOTIFICATIONS !== 'false') { 303 spawnAgentAsync(assigned_to, taskId); 304 } 305 306 return taskId; 307 } 308 309 /** 310 * Get tasks assigned to a specific agent 311 * 312 * @param {string} agentName - Agent name 313 * @param {string} [status='pending'] - Task status filter 314 * @param {number} [limit=5] - Maximum number of tasks to return 315 * @returns {Promise<Array>} - Array of task objects 316 * 317 * @example 318 * const tasks = await getAgentTasks('developer', 'pending', 5); 319 * for (const task of tasks) { 320 * await agent.processTask(task); 321 * } 322 */ 323 export async function getAgentTasks(agentName, status = 'pending', limit = 5) { 324 const tasks = await getAll( 325 `SELECT * FROM tel.agent_tasks 326 WHERE assigned_to = $1 AND status = $2 327 ORDER BY priority DESC, created_at ASC 328 LIMIT $3`, 329 [agentName, status, limit] 330 ); 331 332 // Parse context_json and result_json 333 return tasks.map(task => ({ 334 ...task, 335 context_json: task.context_json ? JSON.parse(task.context_json) : null, 336 result_json: task.result_json ? JSON.parse(task.result_json) : null, 337 })); 338 } 339 340 /** 341 * Update task status 342 * 343 * @param {number} taskId - Task ID 344 * @param {string} status - New status ('pending', 'running', 'completed', 'failed', 'blocked') 345 * @param {Object} [updates={}] - Additional fields to update 346 * @returns {Promise<void>} 347 * 348 * @example 349 * await updateTaskStatus(123, 'running'); 350 * await updateTaskStatus(123, 'failed', { error_message: 'API timeout' }); 351 */ 352 export async function updateTaskStatus(taskId, status, updates = {}) { 353 // Validate status 354 const validStatuses = [ 355 'pending', 356 'running', 357 'completed', 358 'failed', 359 'blocked', 360 'awaiting_po_approval', 361 'awaiting_architect_approval', 362 ]; 363 if (!validStatuses.includes(status)) { 364 throw new Error(`Invalid status: ${status}. Must be one of: ${validStatuses.join(', ')}`); 365 } 366 367 const fields = ['status = $1']; 368 const values = [status]; 369 let paramIdx = 2; 370 371 // Add timestamp based on status 372 if (status === 'running' && !updates.started_at) { 373 fields.push('started_at = NOW()'); 374 } else if ((status === 'completed' || status === 'failed') && !updates.completed_at) { 375 fields.push('completed_at = NOW()'); 376 } 377 378 // Add other updates 379 for (const [key, value] of Object.entries(updates)) { 380 if (key === 'result' || key === 'context') { 381 // JSON fields 382 fields.push(`${key}_json = $${paramIdx}`); 383 values.push(JSON.stringify(value)); 384 } else { 385 fields.push(`${key} = $${paramIdx}`); 386 values.push(value); 387 } 388 paramIdx++; 389 } 390 391 values.push(taskId); 392 393 await run( 394 `UPDATE tel.agent_tasks 395 SET ${fields.join(', ')} 396 WHERE id = $${paramIdx}`, 397 values 398 ); 399 } 400 401 /** 402 * Mark task as running 403 * 404 * @param {number} taskId - Task ID 405 * @returns {Promise<void>} 406 * 407 * @example 408 * await startTask(123); 409 */ 410 export async function startTask(taskId) { 411 await updateTaskStatus(taskId, 'running'); 412 } 413 414 /** 415 * Mark task as completed 416 * 417 * @param {number} taskId - Task ID 418 * @param {Object} [result] - Result object (will be JSON-stringified) 419 * @returns {Promise<void>} 420 * 421 * @example 422 * await completeTask(123, { files_changed: ['src/scoring.js'], fix_commit: 'abc123' }); 423 */ 424 export async function completeTask(taskId, result = null) { 425 await updateTaskStatus(taskId, 'completed', result ? { result } : {}); 426 } 427 428 /** 429 * Mark task as failed 430 * 431 * @param {number} taskId - Task ID 432 * @param {string} errorMessage - Error message 433 * @param {number} [retryCount] - Current retry count 434 * @returns {Promise<void>} 435 * 436 * @example 437 * await failTask(123, 'API timeout after 3 retries', 3); 438 */ 439 export async function failTask(taskId, errorMessage, retryCount = null) { 440 const updates = { error_message: errorMessage }; 441 if (retryCount !== null) { 442 updates.retry_count = retryCount; 443 } 444 await updateTaskStatus(taskId, 'failed', updates); 445 } 446 447 /** 448 * Mark task as blocked 449 * 450 * @param {number} taskId - Task ID 451 * @param {string} reason - Reason for blocking (e.g., 'Coverage below 80%') 452 * @returns {Promise<void>} 453 * 454 * @example 455 * await blockTask(123, 'Coverage below 80% for src/scoring.js (current: 65%)'); 456 */ 457 export async function blockTask(taskId, reason) { 458 await updateTaskStatus(taskId, 'blocked', { error_message: reason }); 459 } 460 461 /** 462 * Get task by ID 463 * 464 * @param {number} taskId - Task ID 465 * @returns {Promise<Object|null>} - Task object or null if not found 466 * 467 * @example 468 * const task = await getTaskById(123); 469 * if (task) { 470 * console.log(`Task ${task.id}: ${task.task_type} - ${task.status}`); 471 * } 472 */ 473 export async function getTaskById(taskId) { 474 const task = await getOne('SELECT * FROM tel.agent_tasks WHERE id = $1', [taskId]); 475 476 if (!task) return null; 477 478 let contextJson = null; 479 let resultJson = null; 480 try { 481 contextJson = task.context_json ? JSON.parse(task.context_json) : null; 482 } catch (_e) { 483 // Malformed JSON - return null 484 } 485 try { 486 resultJson = task.result_json ? JSON.parse(task.result_json) : null; 487 } catch (_e) { 488 // Malformed JSON - return null 489 } 490 return { 491 ...task, 492 context_json: contextJson, 493 result_json: resultJson, 494 }; 495 } 496 497 /** 498 * Get child tasks of a parent task 499 * 500 * @param {number} parentTaskId - Parent task ID 501 * @returns {Promise<Array>} - Array of child task objects 502 * 503 * @example 504 * const children = await getChildTasks(123); 505 * console.log(`Task 123 has ${children.length} child tasks`); 506 */ 507 export async function getChildTasks(parentTaskId) { 508 const tasks = await getAll( 509 `SELECT * FROM tel.agent_tasks 510 WHERE parent_task_id = $1 511 ORDER BY created_at ASC`, 512 [parentTaskId] 513 ); 514 515 return tasks.map(task => ({ 516 ...task, 517 context_json: task.context_json ? JSON.parse(task.context_json) : null, 518 result_json: task.result_json ? JSON.parse(task.result_json) : null, 519 })); 520 } 521 522 /** 523 * Increment retry count for a task 524 * 525 * @param {number} taskId - Task ID 526 * @returns {Promise<number>} - New retry count 527 * 528 * @example 529 * const retryCount = await incrementRetryCount(123); 530 * if (retryCount > 3) { 531 * await failTask(123, 'Max retries exceeded'); 532 * } 533 */ 534 export async function incrementRetryCount(taskId) { 535 const result = await getOne( 536 `UPDATE tel.agent_tasks 537 SET retry_count = retry_count + 1 538 WHERE id = $1 539 RETURNING retry_count`, 540 [taskId] 541 ); 542 return result ? result.retry_count : 0; 543 } 544 545 /** 546 * Check if task exists and is in expected status 547 * 548 * @param {number} taskId - Task ID 549 * @param {string} [expectedStatus] - Expected status to check 550 * @returns {Promise<boolean>} - True if task exists (and matches status if provided) 551 * 552 * @example 553 * if (await taskExists(123, 'pending')) { 554 * await startTask(123); 555 * } 556 */ 557 export async function taskExists(taskId, expectedStatus = null) { 558 const task = await getTaskById(taskId); 559 if (!task) return false; 560 if (expectedStatus && task.status !== expectedStatus) return false; 561 return true; 562 } 563 564 /** 565 * Get agent task statistics 566 * 567 * @param {string} agentName - Agent name 568 * @param {number} [hours=24] - Hours to look back 569 * @returns {Promise<Object>} - Statistics object 570 * 571 * @example 572 * const stats = await getAgentStats('developer', 24); 573 * console.log(`Success rate: ${stats.success_rate * 100}%`); 574 */ 575 export async function getAgentStats(agentName, hours = 24) { 576 const stats = await getOne( 577 `SELECT 578 COUNT(*) as total, 579 SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed, 580 SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed, 581 SUM(CASE WHEN status = 'blocked' THEN 1 ELSE 0 END) as blocked, 582 SUM(CASE WHEN status = 'running' THEN 1 ELSE 0 END) as running, 583 SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending 584 FROM tel.agent_tasks 585 WHERE assigned_to = $1 586 AND created_at > NOW() - ($2 || ' hours')::interval`, 587 [agentName, hours] 588 ); 589 590 const total = parseInt(stats.total, 10); 591 const completed = parseInt(stats.completed, 10); 592 const failed = parseInt(stats.failed, 10); 593 const success_rate = total > 0 ? completed / total : 0; 594 const failure_rate = total > 0 ? failed / total : 0; 595 596 return { 597 ...stats, 598 total, 599 completed, 600 failed, 601 blocked: parseInt(stats.blocked, 10), 602 running: parseInt(stats.running, 10), 603 pending: parseInt(stats.pending, 10), 604 success_rate, 605 failure_rate, 606 }; 607 } 608 609 /** 610 * Check if an agent is currently running 611 * 612 * @param {string} agentName - Agent name 613 * @returns {Promise<boolean>} - True if agent is actively working 614 * 615 * @example 616 * if (!await isAgentRunning('qa')) { 617 * spawnAgentAsync('qa', taskId); 618 * } 619 */ 620 export async function isAgentRunning(agentName) { 621 const staleMinutes = parseInt(process.env.AGENT_LOCK_STALE_MINUTES || '2', 10); 622 623 const result = await getOne( 624 `SELECT status, last_active 625 FROM tel.agent_state 626 WHERE agent_name = $1 627 AND status = 'working' 628 AND last_active > NOW() - ($2 || ' minutes')::interval`, 629 [agentName, staleMinutes] 630 ); 631 return !!result; 632 } 633 634 /** 635 * Spawn agent process immediately to handle new task 636 * 637 * @param {string} agentName - Agent name to spawn 638 * @param {number} taskId - Task ID that triggered the spawn 639 * 640 * @example 641 * spawnAgentAsync('developer', 42); // Fire and forget 642 */ 643 export function spawnAgentAsync(agentName, taskId) { 644 // 1. Load-based throttle: skip if CPU is busy (let existing agents drain first) 645 const LOAD_THRESHOLD = parseFloat(process.env.AGENT_SPAWN_LOAD_THRESHOLD || '0.7'); 646 const normalizedLoad = loadavg()[0] / cpus().length; 647 if (normalizedLoad >= LOAD_THRESHOLD) { 648 logger.info( 649 `CPU load ${normalizedLoad.toFixed(2)} >= ${LOAD_THRESHOLD}, skipping spawn of ${agentName} (task ${taskId})` 650 ); 651 return; 652 } 653 654 // 2. Per-type pool check and global rate limit are async — fire-and-forget 655 _spawnAgentAsyncInner(agentName, taskId, normalizedLoad).catch(err => { 656 logger.warn(`Spawn check error for ${agentName}:`, err.message); 657 }); 658 } 659 660 async function _spawnAgentAsyncInner(agentName, taskId, normalizedLoad) { 661 // Per-type pool check: enforce max concurrent instances per agent type 662 const staleMinutes = parseInt(process.env.AGENT_LOCK_STALE_MINUTES || '2', 10); 663 const POOL_DEFAULTS = { developer: 3, qa: 3 }; 664 const envKey = `AGENT_MAX_${agentName.toUpperCase()}`; 665 const typeMax = parseInt(process.env[envKey] ?? POOL_DEFAULTS[agentName] ?? '1', 10); 666 667 const countRow = await getOne( 668 `SELECT COUNT(*) as count FROM tel.agent_state 669 WHERE agent_name = $1 670 AND status = 'working' 671 AND last_active > NOW() - ($2 || ' minutes')::interval`, 672 [agentName, staleMinutes] 673 ); 674 const runningCount = parseInt(countRow?.count || '0', 10); 675 if (runningCount >= typeMax) { 676 logger.info( 677 `Agent ${agentName} at pool limit (${runningCount}/${typeMax}), skipping spawn (task ${taskId})` 678 ); 679 return; 680 } 681 682 // Global rate limit: max 1 new agent spawn per minute across all types 683 const RATE_LOCK_KEY = 'agent_spawn_ratelimit'; 684 const RATE_INTERVAL_S = parseInt(process.env.AGENT_SPAWN_INTERVAL_S || '60', 10); 685 686 const lastSpawn = await getOne( 687 'SELECT updated_at FROM ops.cron_locks WHERE lock_key = $1', 688 [RATE_LOCK_KEY] 689 ); 690 if (lastSpawn) { 691 const secondsSince = (Date.now() - new Date(lastSpawn.updated_at).getTime()) / 1000; 692 if (secondsSince < RATE_INTERVAL_S) { 693 logger.info( 694 `Spawn rate limit: last spawn was ${Math.round(secondsSince)}s ago, skipping ${agentName} (task ${taskId})` 695 ); 696 return; 697 } 698 } 699 700 try { 701 logger.info( 702 `Spawning agent ${agentName} for task ${taskId} (load: ${normalizedLoad.toFixed(2)})` 703 ); 704 705 // Record spawn timestamp for rate limiting 706 await run( 707 `INSERT INTO ops.cron_locks (lock_key, description, updated_at) 708 VALUES ($1, 'Global agent spawn rate limiter (1/min)', NOW()) 709 ON CONFLICT (lock_key) DO UPDATE SET 710 description = EXCLUDED.description, 711 updated_at = EXCLUDED.updated_at`, 712 [RATE_LOCK_KEY] 713 ); 714 715 // Spawn agent at low priority. NOT detached — parent waits for child so Node.js 716 // can reap it on exit (prevents zombie accumulation when codium is PID 1). 717 const child = spawn('nice', ['-n', '19', 'npm', 'run', 'agent:run:single', agentName], { 718 stdio: 'ignore', 719 cwd: process.cwd(), 720 env: process.env, 721 }); 722 723 child.on('exit', (code, signal) => { 724 logger.info(`Agent ${agentName} (task ${taskId}) exited: code=${code} signal=${signal}`); 725 }); 726 727 child.on('error', err => { 728 logger.warn(`Agent ${agentName} spawn error: ${err.message}`); 729 }); 730 } catch (error) { 731 logger.error(`Failed to spawn agent ${agentName}:`, error); 732 // Don't throw - fallback to scheduled polling 733 } 734 }