/ crates / acdc / src / commands / monitor.rs
monitor.rs
  1  //! Monitoring commands.
  2  
  3  use acdc_monitor::{self, AlertConfig, MonitorConfig};
  4  use anyhow::Result;
  5  
  6  /// Start the metrics server.
  7  pub async fn start_server(port: Option<u16>, _node_id: Option<String>) -> Result<()> {
  8      let config = MonitorConfig {
  9          enabled: true,
 10          prometheus_port: port.unwrap_or(9100),
 11          health_port: port.map(|p| p + 1).unwrap_or(9101),
 12          collection_interval_secs: 15,
 13          alerts: AlertConfig::default(),
 14      };
 15  
 16      println!(
 17          "Starting metrics server on port {}...",
 18          config.prometheus_port
 19      );
 20      println!(
 21          "  Prometheus endpoint: http://0.0.0.0:{}/metrics",
 22          config.prometheus_port
 23      );
 24      println!(
 25          "  Health endpoint: http://0.0.0.0:{}/health",
 26          config.prometheus_port
 27      );
 28      println!();
 29      println!("Press Ctrl+C to stop.");
 30  
 31      acdc_monitor::start(&config).await?;
 32      Ok(())
 33  }
 34  
 35  /// Show current metrics.
 36  pub async fn show_metrics(node_id: &str, json: bool) -> Result<()> {
 37      let metrics = acdc_monitor::collect_metrics(node_id).await?;
 38  
 39      if json {
 40          println!("{}", serde_json::to_string_pretty(&metrics)?);
 41      } else {
 42          println!("Node Metrics for '{}'", node_id);
 43          println!("─────────────────────────────────────");
 44          println!("Block Height:    {}", metrics.block_height);
 45          println!("Peer Count:      {}", metrics.peer_count);
 46          println!("Sync Progress:   {:.2}%", metrics.sync_progress * 100.0);
 47          println!("CPU Usage:       {:.1}%", metrics.cpu_percent);
 48          println!(
 49              "Memory Usage:    {:.2} GB",
 50              metrics.memory_usage as f64 / 1_000_000_000.0
 51          );
 52          println!("Disk Usage:      {:.1}%", metrics.disk_usage_percent);
 53          println!("Uptime:          {} seconds", metrics.uptime_secs);
 54          println!();
 55          println!("Network I/O:");
 56          println!("  Sent:     {} bytes", metrics.network_io.bytes_sent);
 57          println!("  Received: {} bytes", metrics.network_io.bytes_recv);
 58      }
 59  
 60      Ok(())
 61  }
 62  
 63  /// Check for alerts.
 64  pub async fn check_alerts(node_id: &str, json: bool) -> Result<()> {
 65      let metrics = acdc_monitor::collect_metrics(node_id).await?;
 66      let config = AlertConfig::default();
 67      let alerts = acdc_monitor::check_alerts(&metrics, &config).await;
 68  
 69      if json {
 70          println!("{}", serde_json::to_string_pretty(&alerts)?);
 71      } else if alerts.is_empty() {
 72          println!("No active alerts.");
 73      } else {
 74          println!("Active Alerts ({}):", alerts.len());
 75          println!();
 76          print!("{}", acdc_monitor::alerts::format_alerts(&alerts));
 77      }
 78  
 79      Ok(())
 80  }
 81  
 82  /// Test alert configuration.
 83  pub async fn test_alerts() -> Result<()> {
 84      println!("Testing alert configuration...");
 85  
 86      // Create test metrics that trigger alerts
 87      let test_metrics = acdc_monitor::NodeMetrics {
 88          disk_usage_percent: 92.0, // Above default 90% threshold
 89          cpu_percent: 50.0,
 90          memory_usage: 8 * 1024 * 1024 * 1024, // 8GB
 91          peer_count: 2,                        // Below default 3 minimum
 92          sync_progress: 0.95,
 93          block_height: 1000,
 94          uptime_secs: 3600,
 95          disk_io: Default::default(),
 96          network_io: Default::default(),
 97          timestamp: chrono::Utc::now(),
 98      };
 99  
100      let config = AlertConfig::default();
101      let alerts = acdc_monitor::check_alerts(&test_metrics, &config).await;
102  
103      println!();
104      println!("Test metrics:");
105      println!(
106          "  Disk usage: {:.1}% (threshold: {}%)",
107          test_metrics.disk_usage_percent, config.disk_threshold_percent
108      );
109      println!(
110          "  Peer count: {} (minimum: {})",
111          test_metrics.peer_count, config.peer_count_minimum
112      );
113      println!();
114  
115      if alerts.is_empty() {
116          println!("No alerts triggered (unexpected!)");
117      } else {
118          println!("Alerts triggered ({}):", alerts.len());
119          print!("{}", acdc_monitor::alerts::format_alerts(&alerts));
120      }
121  
122      Ok(())
123  }
124  
125  /// Generate Grafana dashboard.
126  pub async fn generate_dashboard(node_id: &str, output: Option<String>) -> Result<()> {
127      let dashboard = acdc_monitor::generate_dashboard(node_id);
128  
129      if let Some(path) = output {
130          std::fs::write(&path, &dashboard)?;
131          println!("Dashboard saved to: {}", path);
132      } else {
133          println!("{}", dashboard);
134      }
135  
136      Ok(())
137  }
138  
139  /// Run health check.
140  pub async fn health(node_id: &str, json: bool) -> Result<()> {
141      let status = acdc_monitor::health_check(node_id).await?;
142  
143      if json {
144          println!("{}", serde_json::to_string_pretty(&status)?);
145      } else {
146          print!("{}", acdc_monitor::health::format_health(&status));
147      }
148  
149      Ok(())
150  }