Cradicle Explorer

/ src / agents / runner.js
runner.js
  1  /**
  2   * Agent Runner
  3   *
  4   * Orchestrates agent task execution by polling each agent.
  5   * Supports both sequential and parallel execution with autoscaling.
  6   * Designed to be run via cron (every 5 minutes) or manually via CLI.
  7   */
  8  
  9  import { MonitorAgent } from './monitor.js';
 10  import { TriageAgent } from './triage.js';
 11  import { DeveloperAgent } from './developer.js';
 12  import { QAAgent } from './qa.js';
 13  import { SecurityAgent } from './security.js';
 14  import { ArchitectAgent } from './architect.js';
 15  import Logger from '../utils/logger.js';
 16  import { run, getOne, getAll } from '../utils/db.js';
 17  import { execSync } from 'child_process';
 18  import os from 'os';
 19  import { isScreenActive } from '../utils/adaptive-concurrency.js';
 20  
 21  const logger = new Logger('AgentRunner');
 22  
 23  /**
 24   * Run one cycle of agent task processing
 25   *
 26   * Order matters:
 27   * 1. Monitor - Scans logs, creates tasks for other agents
 28   * 2. Triage - Routes newly created error classification tasks
 29   * 3. Developer - Fixes bugs, implements features
 30   * 4. QA - Verifies fixes, enforces coverage gates
 31   * 5. Security - Security reviews
 32   * 6. Architect - Design review, doc freshness
 33   *
 34   * @param {Object} options - Execution options
 35   * @param {number} [options.tasksPerAgent=5] - Max tasks per agent per cycle
 36   * @param {boolean} [options.verbose=false] - Log detailed execution info
 37   * @returns {Promise<Object>} - Summary of tasks processed
 38   */
 39  export async function runAgentCycle(options = {}) {
 40    const { tasksPerAgent = 5, verbose = false } = options;
 41  
 42    // Check if agent system is enabled
 43    const enabled = process.env.AGENT_SYSTEM_ENABLED !== 'false';
 44    if (!enabled) {
 45      logger.info('Agent system disabled (AGENT_SYSTEM_ENABLED=false)');
 46      return { enabled: false, processed: 0 };
 47    }
 48  
 49    logger.info('Starting agent cycle', { tasksPerAgent });
 50  
 51    const agents = [
 52      new MonitorAgent(),
 53      new TriageAgent(),
 54      new DeveloperAgent(),
 55      new QAAgent(),
 56      new SecurityAgent(),
 57      new ArchitectAgent(),
 58    ];
 59  
 60    const summary = {
 61      enabled: true,
 62      timestamp: new Date().toISOString(),
 63      agents: {},
 64      total_processed: 0,
 65      errors: [],
 66    };
 67  
 68    for (const agent of agents) {
 69      try {
 70        const startTime = Date.now();
 71        const processed = await agent.pollTasks(tasksPerAgent);
 72        const duration = Date.now() - startTime;
 73  
 74        summary.agents[agent.agentName] = {
 75          processed,
 76          duration_ms: duration,
 77          status: 'success',
 78        };
 79        summary.total_processed += processed;
 80  
 81        if (verbose || processed > 0) {
 82          logger.info(`Agent ${agent.agentName} completed`, {
 83            processed,
 84            duration_ms: duration,
 85          });
 86        }
 87      } catch (error) {
 88        logger.error(`Agent ${agent.agentName} failed`, {
 89          error: error.message,
 90          stack: error.stack,
 91        });
 92  
 93        summary.agents[agent.agentName] = {
 94          processed: 0,
 95          status: 'failed',
 96          error: error.message,
 97        };
 98        summary.errors.push({
 99          agent: agent.agentName,
100          error: error.message,
101        });
102  
103        // Continue with other agents even if one fails
104      }
105    }
106  
107    logger.info('Agent cycle complete', {
108      total_processed: summary.total_processed,
109      errors: summary.errors.length,
110    });
111  
112    return summary;
113  }
114  
115  /**
116   * Get agent system statistics
117   *
118   * @returns {Promise<Object>} - Statistics for all agents
119   */
120  export async function getAgentStats() {
121    const stats = await getAll(
122      `SELECT
123         assigned_to as agent,
124         COUNT(*) as total,
125         SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
126         SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed,
127         SUM(CASE WHEN status = 'blocked' THEN 1 ELSE 0 END) as blocked,
128         SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pending,
129         SUM(CASE WHEN status = 'running' THEN 1 ELSE 0 END) as running,
130         AVG(CASE WHEN status = 'completed' THEN
131           EXTRACT(EPOCH FROM (completed_at - started_at)) / 60
132           ELSE NULL END) as avg_completion_time_minutes
133       FROM tel.agent_tasks
134       WHERE created_at > NOW() - INTERVAL '24 hours'
135       GROUP BY assigned_to`
136    );
137  
138    const overall = await getOne(
139      `SELECT
140         COUNT(*) as total,
141         SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) as completed,
142         SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failed
143       FROM tel.agent_tasks
144       WHERE created_at > NOW() - INTERVAL '24 hours'`
145    );
146  
147    const mapRow = s => {
148      const total = parseInt(s.total, 10);
149      const completed = parseInt(s.completed, 10);
150      const failed = parseInt(s.failed, 10);
151      return {
152        ...s,
153        total,
154        completed,
155        failed,
156        success_rate: total > 0 ? completed / total : 0,
157        failure_rate: total > 0 ? failed / total : 0,
158      };
159    };
160  
161    const overallTotal = parseInt(overall.total, 10);
162    const overallCompleted = parseInt(overall.completed, 10);
163    const overallFailed = parseInt(overall.failed, 10);
164  
165    return {
166      agents: stats.map(mapRow),
167      overall: {
168        ...overall,
169        total: overallTotal,
170        completed: overallCompleted,
171        failed: overallFailed,
172        success_rate: overallTotal > 0 ? overallCompleted / overallTotal : 0,
173        failure_rate: overallTotal > 0 ? overallFailed / overallTotal : 0,
174      },
175    };
176  }
177  
178  /**
179   * Check circuit breaker status for agents
180   *
181   * Disables agents with >30% failure rate and auto-recovers after cooldown
182   *
183   * @returns {Promise<Array>} - List of blocked agents
184   */
185  export async function checkCircuitBreakers() {
186    const threshold = parseFloat(process.env.AGENT_CIRCUIT_BREAKER_THRESHOLD || '0.3');
187    const cooldownMinutes = parseInt(process.env.AGENT_CIRCUIT_BREAKER_COOLDOWN || '30');
188    const stats = await getAgentStats();
189    const blocked = [];
190  
191    for (const agent of stats.agents) {
192      // Check if agent is currently blocked
193      const agentState = await getOne(
194        'SELECT status, metrics_json FROM tel.agent_state WHERE agent_name = $1',
195        [agent.agent]
196      );
197  
198      if (agentState?.status === 'blocked') {
199        // Check if auto-recovery conditions are met
200        const metrics = agentState.metrics_json ? JSON.parse(agentState.metrics_json) : {};
201        const triggeredAt = metrics.circuit_breaker_triggered_at
202          ? new Date(metrics.circuit_breaker_triggered_at)
203          : null;
204  
205        if (triggeredAt) {
206          const cooldownExpired = Date.now() - triggeredAt.getTime() >= cooldownMinutes * 60 * 1000;
207          const failureRateDropped = agent.failure_rate <= threshold;
208  
209          if (cooldownExpired) {
210            // Auto-recover: Reset circuit breaker
211            logger.info(`Auto-recovering circuit breaker for ${agent.agent}`, {
212              cooldown_minutes: cooldownMinutes,
213              current_failure_rate: agent.failure_rate,
214              threshold,
215              blocked_duration_minutes: Math.floor((Date.now() - triggeredAt.getTime()) / 60000),
216            });
217  
218            await run(
219              `UPDATE tel.agent_state
220               SET status = 'idle',
221                   metrics_json = metrics_json
222                     || jsonb_build_object(
223                          'circuit_breaker_recovered_at', NOW()::text,
224                          'circuit_breaker_triggered_at', NOW()::text,
225                          'auto_recovered', true
226                        )
227               WHERE agent_name = $1`,
228              [agent.agent]
229            );
230  
231            // Log recovery
232            await run(
233              `INSERT INTO tel.agent_logs (agent_name, log_level, message, data_json, created_at)
234               VALUES ($1, 'info', $2, $3, NOW())`,
235              [
236                agent.agent,
237                'Circuit breaker auto-recovered',
238                JSON.stringify({
239                  failure_rate: agent.failure_rate,
240                  threshold,
241                  cooldown_minutes: cooldownMinutes,
242                  blocked_duration_minutes: Math.floor((Date.now() - triggeredAt.getTime()) / 60000),
243                }),
244              ]
245            );
246  
247            continue; // Skip blocking this agent
248          } else {
249            logger.debug(`Circuit breaker still blocked for ${agent.agent}`, {
250              cooldown_expired: cooldownExpired,
251              failure_rate_dropped: failureRateDropped,
252              current_failure_rate: agent.failure_rate,
253              threshold,
254            });
255          }
256        }
257      }
258  
259      // Check if we need to trigger circuit breaker
260      // Skip if within cooldown window of a previous trigger (prevents immediate re-trigger after recovery)
261      const currentMetrics = agentState?.metrics_json ? JSON.parse(agentState.metrics_json) : {};
262      const lastTriggeredAt = currentMetrics.circuit_breaker_triggered_at
263        ? new Date(currentMetrics.circuit_breaker_triggered_at)
264        : null;
265      const withinCooldown =
266        lastTriggeredAt && Date.now() - lastTriggeredAt.getTime() < cooldownMinutes * 60 * 1000;
267      if (agent.failure_rate > threshold && agent.total >= 10 && !withinCooldown) {
268        // Minimum 10 tasks to trigger circuit breaker
269        logger.error(`Circuit breaker triggered for ${agent.agent}`, {
270          failure_rate: agent.failure_rate,
271          threshold,
272          total_tasks: agent.total,
273        });
274  
275        // Update agent state to blocked
276        await run(
277          `UPDATE tel.agent_state
278           SET status = 'blocked',
279               metrics_json = metrics_json
280                 || jsonb_build_object(
281                      'circuit_breaker_triggered_at', NOW()::text,
282                      'failure_rate', $1::text
283                    )
284           WHERE agent_name = $2`,
285          [agent.failure_rate, agent.agent]
286        );
287  
288        blocked.push({
289          agent: agent.agent,
290          failure_rate: agent.failure_rate,
291          total_tasks: agent.total,
292        });
293      }
294    }
295  
296    return blocked;
297  }
298  
299  /**
300   * Calculate task limit with autoscaling
301   *
302   * Scales up if queue is backing up, scales down if system is under load
303   *
304   * @param {string} agentName - Agent name
305   * @param {number} queueDepth - Current queue depth for this agent
306   * @returns {number} - Task limit for this cycle
307   */
308  function calculateTaskLimit(agentName, queueDepth) {
309    const baseLimit = parseInt(process.env.AGENT_MAX_TASKS_PER_CYCLE || '5');
310    const maxLimit = 20;
311  
312    // Memory guard: minimum concurrency when RAM is critically low.
313    // Check this FIRST — memory pressure causes swap thrashing which makes
314    // everything slower and creates orphaned git processes → zombies.
315    const freeMemMB = os.freemem() / 1024 / 1024;
316    if (freeMemMB < 512) {
317      logger.warn(
318        `Critical memory (${freeMemMB.toFixed(0)}MB free) → suspending ${agentName} (min=0)`
319      );
320      return 0;
321    }
322    if (freeMemMB < 1024) {
323      logger.warn(`Low memory (${freeMemMB.toFixed(0)}MB free) → throttling ${agentName} to 1 task`);
324      return 1;
325    }
326  
327    // Load guard: check BEFORE queue-depth scaling so high load is always respected.
328    // Bug fix: previously queue-depth > 20 bypassed this check entirely.
329    // When the screen is on (user present) use tighter thresholds to keep
330    // the desktop responsive; when screen is off (AFK) run at full throttle.
331    const loadAvg = os.loadavg()[0];
332    const cpuCount = os.cpus().length;
333    const normalizedLoad = loadAvg / cpuCount;
334    const screenOn = isScreenActive();
335    const suspendThreshold = screenOn ? 1.0 : 1.5;
336    const throttleThreshold = screenOn ? 0.5 : 0.8;
337  
338    if (normalizedLoad > suspendThreshold) {
339      // Severely overloaded → suspend agents entirely. Spawning more work while
340      // the system is thrashing makes things worse, not better.
341      logger.warn(
342        `Severely overloaded (${loadAvg.toFixed(1)}/${cpuCount} = ${normalizedLoad.toFixed(1)}x, screen ${screenOn ? 'on' : 'off'}) → suspending ${agentName}`
343      );
344      return 0;
345    }
346  
347    if (normalizedLoad > throttleThreshold) {
348      const throttledLimit = Math.max(1, Math.floor(baseLimit / 2));
349      logger.warn(
350        `Under load (${loadAvg.toFixed(1)}/${cpuCount} = ${normalizedLoad.toFixed(1)}x, screen ${screenOn ? 'on' : 'off'}) → throttling ${agentName} to ${throttledLimit} tasks`
351      );
352      return throttledLimit;
353    }
354  
355    // Scale up based on queue depth only when system is healthy.
356    if (queueDepth > 20) {
357      logger.info(`Autoscaling ${agentName}: queue depth ${queueDepth} → ${maxLimit} tasks`);
358      return maxLimit;
359    }
360  
361    if (queueDepth > 10) {
362      const scaledLimit = Math.min(baseLimit * 2, maxLimit);
363      logger.info(`Autoscaling ${agentName}: queue depth ${queueDepth} → ${scaledLimit} tasks`);
364      return scaledLimit;
365    }
366  
367    return baseLimit;
368  }
369  
370  /**
371   * Get queue depth for an agent
372   *
373   * @param {string} agentName - Agent name
374   * @returns {Promise<number>} - Number of pending tasks
375   */
376  async function getQueueDepth(agentName) {
377    const result = await getOne(
378      `SELECT COUNT(*) as count FROM tel.agent_tasks WHERE assigned_to = $1 AND status = $2`,
379      [agentName, 'pending']
380    );
381    return parseInt(result?.count || '0', 10);
382  }
383  
384  /**
385   * Run agents in parallel with autoscaling
386   *
387   * @param {Object} options - Execution options
388   * @returns {Promise<Object>} - Summary of tasks processed
389   */
390  async function runAgentCycleParallel(options = {}) {
391    const { verbose = false } = options;
392  
393    logger.info('Starting parallel agent cycle with autoscaling');
394  
395    const agents = [
396      new MonitorAgent(),
397      new TriageAgent(),
398      new DeveloperAgent(),
399      new QAAgent(),
400      new SecurityAgent(),
401      new ArchitectAgent(),
402    ];
403  
404    const summary = {
405      enabled: true,
406      timestamp: new Date().toISOString(),
407      agents: {},
408      total_processed: 0,
409      errors: [],
410    };
411  
412    // Run all agents in parallel
413    const results = await Promise.allSettled(
414      agents.map(async agent => {
415        const startTime = Date.now();
416  
417        // Autoscale based on queue depth
418        const queueDepth = await getQueueDepth(agent.agentName);
419        const taskLimit = calculateTaskLimit(agent.agentName, queueDepth);
420  
421        const processed = await agent.pollTasks(taskLimit);
422        const duration = Date.now() - startTime;
423  
424        return {
425          agentName: agent.agentName,
426          processed,
427          duration,
428          queueDepth,
429          taskLimit,
430        };
431      })
432    );
433  
434    // Process results
435    for (const result of results) {
436      if (result.status === 'fulfilled') {
437        const { agentName, processed, duration, queueDepth, taskLimit } = result.value;
438  
439        summary.agents[agentName] = {
440          processed,
441          duration_ms: duration,
442          queue_depth: queueDepth,
443          task_limit: taskLimit,
444          status: 'success',
445        };
446        summary.total_processed += processed;
447  
448        if (verbose || processed > 0) {
449          logger.info(`Agent ${agentName} completed`, {
450            processed,
451            duration_ms: duration,
452            queue_depth: queueDepth,
453            task_limit: taskLimit,
454          });
455        }
456      } else {
457        const agentName = agents[results.indexOf(result)]?.agentName || 'unknown';
458        const error = result.reason;
459  
460        logger.error(`Agent ${agentName} failed`, {
461          error: error.message,
462          stack: error.stack,
463        });
464  
465        summary.agents[agentName] = {
466          processed: 0,
467          status: 'failed',
468          error: error.message,
469        };
470        summary.errors.push({
471          agent: agentName,
472          error: error.message,
473        });
474      }
475    }
476  
477    logger.info('Parallel agent cycle complete', {
478      total_processed: summary.total_processed,
479      errors: summary.errors.length,
480    });
481  
482    return summary;
483  }
484  
485  /**
486   * Check for nightmare scenarios
487   *
488   * Detects and prevents cost runaway, task loops, and system degradation
489   *
490   * @returns {Promise<Array>} - List of alerts
491   */
492  async function checkNightmareScenarios() {
493    const alerts = [];
494  
495    // 1. Cost runaway (>$1/hour)
496    const hourlyCost = await getOne(
497      `SELECT COUNT(*) * 0.015 as cost_usd
498       FROM tel.agent_logs
499       WHERE created_at > NOW() - INTERVAL '1 hour'
500         AND log_level = 'info'
501         AND message LIKE '%agent initialized%'`
502    );
503  
504    if (hourlyCost.cost_usd > 1.0) {
505      alerts.push({
506        type: 'cost_runaway',
507        severity: 'critical',
508        message: `High cost detected: $${parseFloat(hourlyCost.cost_usd).toFixed(2)}/hour`,
509        action: 'Review agent task creation logic',
510      });
511  
512      // Emergency shutdown if >$5/hour
513      if (hourlyCost.cost_usd > 5.0) {
514        logger.error('EMERGENCY: Disabling agent system due to runaway costs', {
515          cost_per_hour: hourlyCost.cost_usd,
516        });
517        process.env.AGENT_SYSTEM_ENABLED = 'false';
518      }
519    }
520  
521    // 2. Task loop detection (same error >5x in 1 hour)
522    const loopingErrors = await getAll(
523      `SELECT context_json, COUNT(*) as count
524       FROM tel.agent_tasks
525       WHERE task_type = 'fix_bug'
526         AND created_at > NOW() - INTERVAL '1 hour'
527       GROUP BY context_json
528       HAVING COUNT(*) > 5`
529    );
530  
531    if (loopingErrors.length > 0) {
532      alerts.push({
533        type: 'task_loop',
534        severity: 'high',
535        message: `Detected ${loopingErrors.length} looping error(s)`,
536        action: 'Review error classification and fix logic',
537      });
538    }
539  
540    // 3. Circuit breaker cascade (multiple agents blocked)
541    const blockedAgents = await getOne(
542      `SELECT COUNT(*) as count FROM tel.agent_state WHERE status = $1`,
543      ['blocked']
544    );
545  
546    if (parseInt(blockedAgents.count, 10) >= 3) {
547      alerts.push({
548        type: 'circuit_breaker_cascade',
549        severity: 'critical',
550        message: `${blockedAgents.count} agents blocked - system degraded`,
551        action: 'Manual intervention required - reset circuit breakers',
552      });
553    }
554  
555    // 4. Stale task buildup (>50 tasks pending >1 hour)
556    const staleTasks = await getOne(
557      `SELECT COUNT(*) as count
558       FROM tel.agent_tasks
559       WHERE status = 'pending'
560         AND created_at < NOW() - INTERVAL '1 hour'`
561    );
562  
563    if (parseInt(staleTasks.count, 10) > 50) {
564      alerts.push({
565        type: 'stale_task_buildup',
566        severity: 'high',
567        message: `${staleTasks.count} tasks stale (pending >1 hour)`,
568        action: 'Check for blocked workflows or agent failures',
569      });
570    }
571  
572    // Log all alerts
573    for (const alert of alerts) {
574      logger[alert.severity === 'critical' ? 'error' : 'warn'](
575        `Nightmare scenario detected: ${alert.type}`,
576        alert
577      );
578    }
579  
580    return alerts;
581  }
582  
583  /**
584   * Create summary commit on autofix branch
585   *
586   * @param {Object} summary - Agent cycle summary
587   * @returns {string|null} - Commit hash or null if nothing to commit
588   */
589  function createSummaryCommit(summary) {
590    const autoCommit = process.env.AGENT_AUTO_COMMIT === 'true';
591    const branch = process.env.AGENT_AUTO_COMMIT_BRANCH || 'autofix';
592  
593    if (!autoCommit) {
594      logger.debug('Auto-commit disabled (AGENT_AUTO_COMMIT=false)');
595      return null;
596    }
597  
598    try {
599      // Check if there are any changes
600      const status = execSync('git status --porcelain', { encoding: 'utf8', timeout: 15000 }).trim();
601      if (!status) {
602        logger.debug('No changes to commit');
603        return null;
604      }
605  
606      // Ensure we're on the autofix branch
607      const currentBranch = execSync('git rev-parse --abbrev-ref HEAD', {
608        encoding: 'utf8',
609        timeout: 15000,
610      }).trim();
611  
612      if (currentBranch !== branch) {
613        // Try to switch to autofix branch
614        try {
615          execSync(`git checkout -B ${branch}`, { encoding: 'utf8', timeout: 15000 });
616          logger.info(`Switched to ${branch} branch`);
617        } catch (err) {
618          logger.error(`Failed to switch to ${branch} branch`, { error: err.message });
619          return null;
620        }
621      }
622  
623      // Build commit message
624      const timestamp = new Date().toISOString().replace('T', ' ').substring(0, 19);
625      const agentDetails = Object.entries(summary.agents)
626        .map(
627          ([agent, data]) =>
628            `- ${agent.padEnd(12)}: ${data.processed} tasks (${data.duration_ms}ms)${data.error ? ` [ERROR: ${data.error}]` : ''}`
629        )
630        .join('\n');
631  
632      const commitMessage = `Agent cycle ${timestamp} UTC
633  
634  Summary:
635  ${agentDetails}
636  
637  Total: ${summary.total_processed} tasks processed
638  ${summary.errors.length > 0 ? `Errors: ${summary.errors.length}` : ''}
639  
640  Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>`;
641  
642      // Stage all changes
643      execSync('git add -A', { encoding: 'utf8', timeout: 15000 });
644  
645      // Commit
646      execSync(`git commit -m ${JSON.stringify(commitMessage)}`, {
647        encoding: 'utf8',
648        timeout: 30000,
649      });
650  
651      const commitHash = execSync('git rev-parse --short HEAD', {
652        encoding: 'utf8',
653        timeout: 15000,
654      }).trim();
655  
656      logger.info('Summary commit created', {
657        branch,
658        commit: commitHash,
659        files_changed: status.split('\n').length,
660      });
661  
662      // Push if enabled
663      if (process.env.AGENT_AUTO_PUSH === 'true') {
664        try {
665          execSync(`git push -u origin ${branch}`, { encoding: 'utf8', timeout: 60000 });
666          logger.info(`Pushed ${branch} branch to origin`);
667        } catch (err) {
668          logger.warn('Failed to push branch', { error: err.message });
669        }
670      }
671  
672      return commitHash;
673    } catch (error) {
674      logger.error('Failed to create summary commit', {
675        error: error.message,
676        stack: error.stack,
677      });
678      return null;
679    }
680  }
681  
682  /**
683   * Run complete agent cycle with all safety features
684   *
685   * @param {Object} options - Execution options
686   * @returns {Promise<Object>} - Summary with alerts and commit info
687   */
688  export async function runCompleteAgentCycle(options = {}) {
689    // Self-renice + ionice: agents are background maintenance, not user-facing work.
690    // Yields CPU and I/O to pipeline and VSCode without systemd config changes.
691    try {
692      os.setPriority(process.pid, 19);
693    } catch {
694      // setPriority may fail in restricted environments — non-fatal
695    }
696    try {
697      // ionice class 2 level 7 = best-effort, lowest priority.
698      // Not class 3 (idle) since agents need to make progress; class 2 level 7
699      // yields I/O to anything else that wants it but isn't completely blocked.
700      execSync(`ionice -c 2 -n 7 -p ${process.pid} 2>/dev/null || true`, {
701        timeout: 3000,
702        shell: '/bin/sh',
703      });
704    } catch {
705      // ionice may not be available — non-fatal
706    }
707  
708    // Support both old and new env var names (backwards compatibility)
709    const parallel =
710      process.env.AGENT_RUN_TYPES_IN_PARALLEL === 'true' ||
711      process.env.AGENT_PARALLEL_EXECUTION === 'true';
712  
713    // Check for nightmare scenarios before running
714    const preAlerts = await checkNightmareScenarios();
715    if (preAlerts.some(a => a.severity === 'critical')) {
716      logger.error('Critical alerts detected - aborting agent cycle');
717      return {
718        enabled: false,
719        aborted: true,
720        alerts: preAlerts,
721      };
722    }
723  
724    // Run agent cycle (parallel or sequential)
725    const summary = parallel ? await runAgentCycleParallel(options) : await runAgentCycle(options);
726  
727    // Check circuit breakers
728    const blockedAgents = await checkCircuitBreakers();
729    if (blockedAgents.length > 0) {
730      summary.circuit_breakers = blockedAgents;
731    }
732  
733    // Check for nightmare scenarios after running
734    const postAlerts = await checkNightmareScenarios();
735    if (postAlerts.length > 0) {
736      summary.alerts = postAlerts;
737    }
738  
739    // Create summary commit if auto-commit enabled
740    const commitHash = createSummaryCommit(summary);
741    if (commitHash) {
742      summary.commit = commitHash;
743      summary.branch = process.env.AGENT_AUTO_COMMIT_BRANCH || 'autofix';
744    }
745  
746    return summary;
747  }
748  
749  /**
750   * CLI entry point when run directly
751   */
752  if (import.meta.url === `file://${process.argv[1]}`) {
753    const args = process.argv.slice(2);
754    const verbose = args.includes('--verbose') || args.includes('-v');
755    const tasksPerAgent = parseInt(args.find(arg => arg.startsWith('--tasks='))?.split('=')[1]) || 5;
756  
757    runCompleteAgentCycle({ tasksPerAgent, verbose })
758      .then(summary => {
759        console.log('\nAgent Cycle Summary:');
760        console.log(`Total tasks processed: ${summary.total_processed || 0}`);
761  
762        if (summary.aborted) {
763          console.log('\n Cycle aborted due to critical alerts');
764        }
765  
766        console.log('\nBy agent:');
767        for (const [agent, data] of Object.entries(summary.agents || {})) {
768          console.log(`  ${agent}: ${data.processed} tasks (${data.duration_ms}ms)`);
769          if (data.queue_depth !== undefined) {
770            console.log(`    Queue: ${data.queue_depth}, Limit: ${data.task_limit}`);
771          }
772          if (data.error) {
773            console.log(`    ERROR: ${data.error}`);
774          }
775        }
776  
777        if (summary.commit) {
778          console.log(`\n Summary commit created: ${summary.commit} (${summary.branch})`);
779        }
780  
781        if (summary.alerts && summary.alerts.length > 0) {
782          console.log('\n Alerts:');
783          for (const alert of summary.alerts) {
784            console.log(`  ${alert.severity.toUpperCase()}: ${alert.message}`);
785            console.log(`    Action: ${alert.action}`);
786          }
787        }
788  
789        if (summary.circuit_breakers && summary.circuit_breakers.length > 0) {
790          console.log('\n Circuit Breakers Triggered:');
791          for (const cb of summary.circuit_breakers) {
792            console.log(`  ${cb.agent}: ${(cb.failure_rate * 100).toFixed(1)}% failure rate`);
793          }
794        }
795  
796        if (summary.errors && summary.errors.length > 0) {
797          console.log(`\n ${summary.errors.length} agent(s) failed`);
798          process.exit(1);
799        } else {
800          console.log('\n All agents completed successfully');
801          process.exit(0);
802        }
803      })
804      .catch(error => {
805        console.error('Fatal error in agent runner:', error);
806        process.exit(1);
807      });
808  }