echo.sh
1 #!/bin/bash 2 3 ############################################################################### 4 # ECHO - System Status Monitor 5 # 6 # Checks the health and status of all ECHO agents and infrastructure 7 ############################################################################### 8 9 set -euo pipefail 10 11 # Colors for output 12 RED='\033[0;31m' 13 GREEN='\033[0;32m' 14 YELLOW='\033[1;33m' 15 BLUE='\033[0;34m' 16 CYAN='\033[0;36m' 17 BOLD='\033[1m' 18 NC='\033[0m' # No Color 19 20 # Database configuration 21 DB_HOST="${DB_HOST:-localhost}" 22 DB_PORT="${DB_PORT:-5432}" 23 DB_NAME="${DB_NAME:-echo_org_dev}" 24 DB_USER="${DB_USER:-postgres}" 25 26 # Redis configuration 27 REDIS_HOST="${REDIS_HOST:-localhost}" 28 REDIS_PORT="${REDIS_PORT:-6379}" 29 30 # Agent roles 31 AGENTS=( 32 "ceo" 33 "cto" 34 "chro" 35 "operations_head" 36 "product_manager" 37 "senior_architect" 38 "uiux_engineer" 39 "senior_developer" 40 "test_lead" 41 ) 42 43 ############################################################################### 44 # Utility Functions 45 ############################################################################### 46 47 print_header() { 48 echo -e "\n${BOLD}${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" 49 echo -e "${BOLD}${CYAN} $1${NC}" 50 echo -e "${BOLD}${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n" 51 } 52 53 print_status() { 54 local status=$1 55 local message=$2 56 57 case $status in 58 "ok") 59 echo -e " ${GREEN}✓${NC} $message" 60 ;; 61 "warning") 62 echo -e " ${YELLOW}⚠${NC} $message" 63 ;; 64 "error") 65 echo -e " ${RED}✗${NC} $message" 66 ;; 67 "info") 68 echo -e " ${BLUE}ℹ${NC} $message" 69 ;; 70 esac 71 } 72 73 ############################################################################### 74 # Infrastructure Checks 75 ############################################################################### 76 77 check_postgres() { 78 print_header "PostgreSQL Status" 79 80 if command -v psql &> /dev/null; then 81 if PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c '\q' 2>/dev/null; then 82 print_status "ok" "PostgreSQL is ${GREEN}RUNNING${NC} on $DB_HOST:$DB_PORT" 83 84 # Get database size 85 local db_size=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "SELECT pg_size_pretty(pg_database_size('$DB_NAME'));" 2>/dev/null | xargs) 86 print_status "info" "Database size: $db_size" 87 88 # Get table counts 89 local tables=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c " 90 SELECT 91 'decisions: ' || COUNT(*) || ' | ' || 92 'messages: ' || (SELECT COUNT(*) FROM messages) || ' | ' || 93 'workflows: ' || (SELECT COUNT(*) FROM workflow_executions) 94 FROM decisions; 95 " 2>/dev/null | xargs) 96 print_status "info" "Records: $tables" 97 98 return 0 99 else 100 print_status "error" "PostgreSQL is ${RED}NOT ACCESSIBLE${NC}" 101 print_status "info" "Try: pg_ctl -D /usr/local/var/postgres start" 102 return 1 103 fi 104 else 105 print_status "warning" "psql not found in PATH" 106 return 1 107 fi 108 } 109 110 check_redis() { 111 print_header "Redis Status" 112 113 if command -v redis-cli &> /dev/null; then 114 if redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping &>/dev/null; then 115 print_status "ok" "Redis is ${GREEN}RUNNING${NC} on $REDIS_HOST:$REDIS_PORT" 116 117 # Get memory usage 118 local mem_used=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" INFO memory 2>/dev/null | grep used_memory_human | cut -d: -f2 | tr -d '\r') 119 print_status "info" "Memory used: $mem_used" 120 121 # Get number of keys 122 local keys=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" DBSIZE 2>/dev/null | cut -d: -f2 | xargs) 123 print_status "info" "Keys: $keys" 124 125 return 0 126 else 127 print_status "error" "Redis is ${RED}NOT ACCESSIBLE${NC}" 128 print_status "info" "Try: redis-server --daemonize yes" 129 return 1 130 fi 131 else 132 print_status "warning" "redis-cli not found in PATH" 133 return 1 134 fi 135 } 136 137 ############################################################################### 138 # Agent Health Checks 139 ############################################################################### 140 141 check_agent_health() { 142 print_header "Agent Health Status" 143 144 if ! command -v psql &> /dev/null; then 145 print_status "error" "Cannot check agent health - psql not available" 146 return 1 147 fi 148 149 # Query agent_status table 150 local query=" 151 SELECT 152 role, 153 status, 154 EXTRACT(EPOCH FROM (NOW() - last_heartbeat))::INTEGER as seconds_ago, 155 version 156 FROM agent_status 157 ORDER BY role; 158 " 159 160 local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null) 161 162 if [ -z "$result" ]; then 163 print_status "warning" "No agents have sent heartbeats yet" 164 echo "" 165 print_status "info" "Expected agents:" 166 for agent in "${AGENTS[@]}"; do 167 echo -e " - $agent" 168 done 169 return 1 170 fi 171 172 local healthy=0 173 local degraded=0 174 local down=0 175 176 echo -e "${BOLD} Role Status Last Heartbeat Version${NC}" 177 echo -e " ────────────────────────────────────────────────────────────────" 178 179 while IFS='|' read -r role status seconds_ago version; do 180 local age_display 181 182 if [ "$seconds_ago" -lt 60 ]; then 183 age_display="${seconds_ago}s ago" 184 else 185 age_display="$((seconds_ago / 60))m ago" 186 fi 187 188 # Determine health 189 if [ "$seconds_ago" -lt 30 ]; then 190 # Healthy 191 echo -e " ${GREEN}●${NC} $(printf '%-18s' "$role") ${GREEN}HEALTHY${NC} $age_display ${version:-N/A}" 192 ((healthy++)) 193 elif [ "$seconds_ago" -lt 60 ]; then 194 # Degraded 195 echo -e " ${YELLOW}●${NC} $(printf '%-18s' "$role") ${YELLOW}DEGRADED${NC} $age_display ${version:-N/A}" 196 ((degraded++)) 197 else 198 # Down 199 echo -e " ${RED}●${NC} $(printf '%-18s' "$role") ${RED}DOWN${NC} $age_display ${version:-N/A}" 200 ((down++)) 201 fi 202 done <<< "$result" 203 204 echo "" 205 print_status "info" "Summary: ${GREEN}$healthy healthy${NC}, ${YELLOW}$degraded degraded${NC}, ${RED}$down down${NC}" 206 207 if [ "$down" -gt 0 ]; then 208 return 1 209 else 210 return 0 211 fi 212 } 213 214 ############################################################################### 215 # Workflow Status 216 ############################################################################### 217 218 check_workflows() { 219 print_header "Workflow Status" 220 221 if ! command -v psql &> /dev/null; then 222 print_status "error" "Cannot check workflows - psql not available" 223 return 1 224 fi 225 226 # Count workflows by status 227 local query=" 228 SELECT 229 status, 230 COUNT(*) 231 FROM workflow_executions 232 GROUP BY status 233 ORDER BY 234 CASE status 235 WHEN 'running' THEN 1 236 WHEN 'paused' THEN 2 237 WHEN 'completed' THEN 3 238 WHEN 'failed' THEN 4 239 END; 240 " 241 242 local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null) 243 244 if [ -z "$result" ]; then 245 print_status "info" "No workflows executed yet" 246 return 0 247 fi 248 249 while IFS='|' read -r status count; do 250 case $status in 251 "running") 252 print_status "info" "${CYAN}Running:${NC} $count workflow(s)" 253 ;; 254 "paused") 255 print_status "warning" "${YELLOW}Paused:${NC} $count workflow(s)" 256 ;; 257 "completed") 258 print_status "ok" "${GREEN}Completed:${NC} $count workflow(s)" 259 ;; 260 "failed") 261 print_status "error" "${RED}Failed:${NC} $count workflow(s)" 262 ;; 263 esac 264 done <<< "$result" 265 266 # Show recent workflows 267 local recent=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c " 268 SELECT 269 id, 270 workflow_name, 271 status, 272 inserted_at 273 FROM workflow_executions 274 ORDER BY inserted_at DESC 275 LIMIT 3; 276 " 2>/dev/null) 277 278 if [ ! -z "$recent" ]; then 279 echo "" 280 print_status "info" "Recent workflows:" 281 echo "$recent" | sed 's/^/ /' 282 fi 283 } 284 285 ############################################################################### 286 # Message Queue Status 287 ############################################################################### 288 289 check_messages() { 290 print_header "Message Queue Status" 291 292 if ! command -v psql &> /dev/null; then 293 print_status "error" "Cannot check messages - psql not available" 294 return 1 295 fi 296 297 # Unread messages by recipient 298 local query=" 299 SELECT 300 to_role, 301 COUNT(*) as unread 302 FROM messages 303 WHERE read = false 304 GROUP BY to_role 305 ORDER BY unread DESC; 306 " 307 308 local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null) 309 310 if [ -z "$result" ]; then 311 print_status "ok" "All messages processed ✓" 312 else 313 print_status "warning" "Unread messages detected:" 314 while IFS='|' read -r to_role count; do 315 echo -e " ${YELLOW}→${NC} $to_role: $count unread" 316 done <<< "$result" 317 fi 318 319 # Failed message processing 320 local failed=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c " 321 SELECT COUNT(*) FROM messages WHERE processing_error IS NOT NULL; 322 " 2>/dev/null | xargs) 323 324 if [ "$failed" -gt 0 ]; then 325 print_status "error" "${RED}$failed${NC} message(s) failed processing" 326 fi 327 328 # Total messages 329 local total=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c " 330 SELECT COUNT(*) FROM messages; 331 " 2>/dev/null | xargs) 332 333 echo "" 334 print_status "info" "Total messages: $total" 335 } 336 337 ############################################################################### 338 # Decision Status 339 ############################################################################### 340 341 check_decisions() { 342 print_header "Decision Status" 343 344 if ! command -v psql &> /dev/null; then 345 print_status "error" "Cannot check decisions - psql not available" 346 return 1 347 fi 348 349 # Decisions by mode 350 local query=" 351 SELECT 352 mode, 353 COUNT(*) 354 FROM decisions 355 GROUP BY mode 356 ORDER BY COUNT(*) DESC; 357 " 358 359 local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null) 360 361 if [ -z "$result" ]; then 362 print_status "info" "No decisions recorded yet" 363 return 0 364 fi 365 366 echo -e "${BOLD} Decision Mode Count${NC}" 367 echo -e " ─────────────────────────────" 368 while IFS='|' read -r mode count; do 369 echo -e " $(printf '%-18s' "$mode") $count" 370 done <<< "$result" 371 372 # Pending decisions 373 local pending=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c " 374 SELECT COUNT(*) FROM decisions WHERE status = 'pending'; 375 " 2>/dev/null | xargs) 376 377 if [ "$pending" -gt 0 ]; then 378 echo "" 379 print_status "warning" "${YELLOW}$pending${NC} decision(s) pending" 380 fi 381 } 382 383 ############################################################################### 384 # System Summary 385 ############################################################################### 386 387 print_summary() { 388 print_header "System Summary" 389 390 local postgres_ok=0 391 local redis_ok=0 392 local agents_ok=0 393 394 # Check if services are up 395 if PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c '\q' 2>/dev/null; then 396 postgres_ok=1 397 fi 398 399 if command -v redis-cli &> /dev/null && redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping &>/dev/null; then 400 redis_ok=1 401 fi 402 403 # Count healthy agents 404 local healthy_count=0 405 if [ "$postgres_ok" -eq 1 ]; then 406 healthy_count=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c " 407 SELECT COUNT(*) 408 FROM agent_status 409 WHERE EXTRACT(EPOCH FROM (NOW() - last_heartbeat)) < 30; 410 " 2>/dev/null | xargs || echo "0") 411 fi 412 413 if [ "$healthy_count" -gt 0 ]; then 414 agents_ok=1 415 fi 416 417 # Overall status 418 if [ "$postgres_ok" -eq 1 ] && [ "$redis_ok" -eq 1 ] && [ "$agents_ok" -eq 1 ]; then 419 echo -e " ${GREEN}●${NC} System Status: ${BOLD}${GREEN}OPERATIONAL${NC}" 420 elif [ "$postgres_ok" -eq 1 ] && [ "$redis_ok" -eq 1 ]; then 421 echo -e " ${YELLOW}●${NC} System Status: ${BOLD}${YELLOW}DEGRADED${NC} (no healthy agents)" 422 else 423 echo -e " ${RED}●${NC} System Status: ${BOLD}${RED}DOWN${NC}" 424 fi 425 426 echo "" 427 echo -e " ${BOLD}Infrastructure:${NC}" 428 [ "$postgres_ok" -eq 1 ] && echo -e " ${GREEN}✓${NC} PostgreSQL" || echo -e " ${RED}✗${NC} PostgreSQL" 429 [ "$redis_ok" -eq 1 ] && echo -e " ${GREEN}✓${NC} Redis" || echo -e " ${RED}✗${NC} Redis" 430 431 if [ "$postgres_ok" -eq 1 ]; then 432 echo "" 433 echo -e " ${BOLD}Agents:${NC}" 434 echo -e " ${GREEN}✓${NC} $healthy_count / ${#AGENTS[@]} agents healthy" 435 fi 436 437 echo "" 438 } 439 440 ############################################################################### 441 # Main Menu 442 ############################################################################### 443 444 show_help() { 445 cat << EOF 446 ${BOLD}ECHO System Status Monitor${NC} 447 448 ${BOLD}USAGE:${NC} 449 ./echo.sh [COMMAND] 450 451 ${BOLD}COMMANDS:${NC} 452 status Show full system status (default) 453 agents Show only agent health 454 infra Show only infrastructure (PostgreSQL, Redis) 455 workflows Show only workflow status 456 messages Show only message queue status 457 decisions Show only decision status 458 summary Show quick system summary 459 help Show this help message 460 461 ${BOLD}ENVIRONMENT VARIABLES:${NC} 462 DB_HOST PostgreSQL host (default: localhost) 463 DB_PORT PostgreSQL port (default: 5432) 464 DB_NAME Database name (default: echo_org) 465 DB_USER Database user (default: postgres) 466 DB_PASSWORD Database password (default: postgres) 467 REDIS_HOST Redis host (default: localhost) 468 REDIS_PORT Redis port (default: 6379) 469 470 ${BOLD}EXAMPLES:${NC} 471 ./echo.sh # Show full status 472 ./echo.sh agents # Check only agents 473 ./echo.sh summary # Quick overview 474 DB_HOST=prod.db ./echo.sh # Check production database 475 476 EOF 477 } 478 479 ############################################################################### 480 # Main 481 ############################################################################### 482 483 main() { 484 local command="${1:-status}" 485 486 case $command in 487 status) 488 echo -e "${BOLD}${CYAN}" 489 echo " ███████╗ ██████╗██╗ ██╗ ██████╗ " 490 echo " ██╔════╝██╔════╝██║ ██║██╔═══██╗" 491 echo " █████╗ ██║ ███████║██║ ██║" 492 echo " ██╔══╝ ██║ ██╔══██║██║ ██║" 493 echo " ███████╗╚██████╗██║ ██║╚██████╔╝" 494 echo " ╚══════╝ ╚═════╝╚═╝ ╚═╝ ╚═════╝ " 495 echo -e "${NC}" 496 echo -e " ${BOLD}Executive Coordination & Hierarchical Organization${NC}" 497 echo "" 498 499 check_postgres 500 check_redis 501 check_agent_health 502 check_workflows 503 check_messages 504 check_decisions 505 print_summary 506 ;; 507 508 agents) 509 check_agent_health 510 ;; 511 512 infra) 513 check_postgres 514 check_redis 515 ;; 516 517 workflows) 518 check_workflows 519 ;; 520 521 messages) 522 check_messages 523 ;; 524 525 decisions) 526 check_decisions 527 ;; 528 529 summary) 530 print_summary 531 ;; 532 533 help|--help|-h) 534 show_help 535 ;; 536 537 *) 538 echo -e "${RED}Error: Unknown command '$command'${NC}" 539 echo "" 540 show_help 541 exit 1 542 ;; 543 esac 544 } 545 546 # Run main function 547 main "$@"