/ scripts / utils / echo.sh
echo.sh
  1  #!/bin/bash
  2  
  3  ###############################################################################
  4  # ECHO - System Status Monitor
  5  #
  6  # Checks the health and status of all ECHO agents and infrastructure
  7  ###############################################################################
  8  
  9  set -euo pipefail
 10  
 11  # Colors for output
 12  RED='\033[0;31m'
 13  GREEN='\033[0;32m'
 14  YELLOW='\033[1;33m'
 15  BLUE='\033[0;34m'
 16  CYAN='\033[0;36m'
 17  BOLD='\033[1m'
 18  NC='\033[0m' # No Color
 19  
 20  # Database configuration
 21  DB_HOST="${DB_HOST:-localhost}"
 22  DB_PORT="${DB_PORT:-5432}"
 23  DB_NAME="${DB_NAME:-echo_org_dev}"
 24  DB_USER="${DB_USER:-postgres}"
 25  
 26  # Redis configuration
 27  REDIS_HOST="${REDIS_HOST:-localhost}"
 28  REDIS_PORT="${REDIS_PORT:-6379}"
 29  
 30  # Agent roles
 31  AGENTS=(
 32    "ceo"
 33    "cto"
 34    "chro"
 35    "operations_head"
 36    "product_manager"
 37    "senior_architect"
 38    "uiux_engineer"
 39    "senior_developer"
 40    "test_lead"
 41  )
 42  
 43  ###############################################################################
 44  # Utility Functions
 45  ###############################################################################
 46  
 47  print_header() {
 48    echo -e "\n${BOLD}${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 49    echo -e "${BOLD}${CYAN}  $1${NC}"
 50    echo -e "${BOLD}${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"
 51  }
 52  
 53  print_status() {
 54    local status=$1
 55    local message=$2
 56  
 57    case $status in
 58      "ok")
 59        echo -e "  ${GREEN}✓${NC} $message"
 60        ;;
 61      "warning")
 62        echo -e "  ${YELLOW}⚠${NC} $message"
 63        ;;
 64      "error")
 65        echo -e "  ${RED}✗${NC} $message"
 66        ;;
 67      "info")
 68        echo -e "  ${BLUE}ℹ${NC} $message"
 69        ;;
 70    esac
 71  }
 72  
 73  ###############################################################################
 74  # Infrastructure Checks
 75  ###############################################################################
 76  
 77  check_postgres() {
 78    print_header "PostgreSQL Status"
 79  
 80    if command -v psql &> /dev/null; then
 81      if PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c '\q' 2>/dev/null; then
 82        print_status "ok" "PostgreSQL is ${GREEN}RUNNING${NC} on $DB_HOST:$DB_PORT"
 83  
 84        # Get database size
 85        local db_size=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "SELECT pg_size_pretty(pg_database_size('$DB_NAME'));" 2>/dev/null | xargs)
 86        print_status "info" "Database size: $db_size"
 87  
 88        # Get table counts
 89        local tables=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
 90          SELECT
 91            'decisions: ' || COUNT(*) || ' | ' ||
 92            'messages: ' || (SELECT COUNT(*) FROM messages) || ' | ' ||
 93            'workflows: ' || (SELECT COUNT(*) FROM workflow_executions)
 94          FROM decisions;
 95        " 2>/dev/null | xargs)
 96        print_status "info" "Records: $tables"
 97  
 98        return 0
 99      else
100        print_status "error" "PostgreSQL is ${RED}NOT ACCESSIBLE${NC}"
101        print_status "info" "Try: pg_ctl -D /usr/local/var/postgres start"
102        return 1
103      fi
104    else
105      print_status "warning" "psql not found in PATH"
106      return 1
107    fi
108  }
109  
110  check_redis() {
111    print_header "Redis Status"
112  
113    if command -v redis-cli &> /dev/null; then
114      if redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping &>/dev/null; then
115        print_status "ok" "Redis is ${GREEN}RUNNING${NC} on $REDIS_HOST:$REDIS_PORT"
116  
117        # Get memory usage
118        local mem_used=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" INFO memory 2>/dev/null | grep used_memory_human | cut -d: -f2 | tr -d '\r')
119        print_status "info" "Memory used: $mem_used"
120  
121        # Get number of keys
122        local keys=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" DBSIZE 2>/dev/null | cut -d: -f2 | xargs)
123        print_status "info" "Keys: $keys"
124  
125        return 0
126      else
127        print_status "error" "Redis is ${RED}NOT ACCESSIBLE${NC}"
128        print_status "info" "Try: redis-server --daemonize yes"
129        return 1
130      fi
131    else
132      print_status "warning" "redis-cli not found in PATH"
133      return 1
134    fi
135  }
136  
137  ###############################################################################
138  # Agent Health Checks
139  ###############################################################################
140  
141  check_agent_health() {
142    print_header "Agent Health Status"
143  
144    if ! command -v psql &> /dev/null; then
145      print_status "error" "Cannot check agent health - psql not available"
146      return 1
147    fi
148  
149    # Query agent_status table
150    local query="
151      SELECT
152        role,
153        status,
154        EXTRACT(EPOCH FROM (NOW() - last_heartbeat))::INTEGER as seconds_ago,
155        version
156      FROM agent_status
157      ORDER BY role;
158    "
159  
160    local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null)
161  
162    if [ -z "$result" ]; then
163      print_status "warning" "No agents have sent heartbeats yet"
164      echo ""
165      print_status "info" "Expected agents:"
166      for agent in "${AGENTS[@]}"; do
167        echo -e "    - $agent"
168      done
169      return 1
170    fi
171  
172    local healthy=0
173    local degraded=0
174    local down=0
175  
176    echo -e "${BOLD}  Role                Status      Last Heartbeat    Version${NC}"
177    echo -e "  ────────────────────────────────────────────────────────────────"
178  
179    while IFS='|' read -r role status seconds_ago version; do
180      local age_display
181  
182      if [ "$seconds_ago" -lt 60 ]; then
183        age_display="${seconds_ago}s ago"
184      else
185        age_display="$((seconds_ago / 60))m ago"
186      fi
187  
188      # Determine health
189      if [ "$seconds_ago" -lt 30 ]; then
190        # Healthy
191        echo -e "  ${GREEN}●${NC} $(printf '%-18s' "$role") ${GREEN}HEALTHY${NC}     $age_display    ${version:-N/A}"
192        ((healthy++))
193      elif [ "$seconds_ago" -lt 60 ]; then
194        # Degraded
195        echo -e "  ${YELLOW}●${NC} $(printf '%-18s' "$role") ${YELLOW}DEGRADED${NC}    $age_display    ${version:-N/A}"
196        ((degraded++))
197      else
198        # Down
199        echo -e "  ${RED}●${NC} $(printf '%-18s' "$role") ${RED}DOWN${NC}        $age_display    ${version:-N/A}"
200        ((down++))
201      fi
202    done <<< "$result"
203  
204    echo ""
205    print_status "info" "Summary: ${GREEN}$healthy healthy${NC}, ${YELLOW}$degraded degraded${NC}, ${RED}$down down${NC}"
206  
207    if [ "$down" -gt 0 ]; then
208      return 1
209    else
210      return 0
211    fi
212  }
213  
214  ###############################################################################
215  # Workflow Status
216  ###############################################################################
217  
218  check_workflows() {
219    print_header "Workflow Status"
220  
221    if ! command -v psql &> /dev/null; then
222      print_status "error" "Cannot check workflows - psql not available"
223      return 1
224    fi
225  
226    # Count workflows by status
227    local query="
228      SELECT
229        status,
230        COUNT(*)
231      FROM workflow_executions
232      GROUP BY status
233      ORDER BY
234        CASE status
235          WHEN 'running' THEN 1
236          WHEN 'paused' THEN 2
237          WHEN 'completed' THEN 3
238          WHEN 'failed' THEN 4
239        END;
240    "
241  
242    local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null)
243  
244    if [ -z "$result" ]; then
245      print_status "info" "No workflows executed yet"
246      return 0
247    fi
248  
249    while IFS='|' read -r status count; do
250      case $status in
251        "running")
252          print_status "info" "${CYAN}Running:${NC} $count workflow(s)"
253          ;;
254        "paused")
255          print_status "warning" "${YELLOW}Paused:${NC} $count workflow(s)"
256          ;;
257        "completed")
258          print_status "ok" "${GREEN}Completed:${NC} $count workflow(s)"
259          ;;
260        "failed")
261          print_status "error" "${RED}Failed:${NC} $count workflow(s)"
262          ;;
263      esac
264    done <<< "$result"
265  
266    # Show recent workflows
267    local recent=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
268      SELECT
269        id,
270        workflow_name,
271        status,
272        inserted_at
273      FROM workflow_executions
274      ORDER BY inserted_at DESC
275      LIMIT 3;
276    " 2>/dev/null)
277  
278    if [ ! -z "$recent" ]; then
279      echo ""
280      print_status "info" "Recent workflows:"
281      echo "$recent" | sed 's/^/    /'
282    fi
283  }
284  
285  ###############################################################################
286  # Message Queue Status
287  ###############################################################################
288  
289  check_messages() {
290    print_header "Message Queue Status"
291  
292    if ! command -v psql &> /dev/null; then
293      print_status "error" "Cannot check messages - psql not available"
294      return 1
295    fi
296  
297    # Unread messages by recipient
298    local query="
299      SELECT
300        to_role,
301        COUNT(*) as unread
302      FROM messages
303      WHERE read = false
304      GROUP BY to_role
305      ORDER BY unread DESC;
306    "
307  
308    local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null)
309  
310    if [ -z "$result" ]; then
311      print_status "ok" "All messages processed ✓"
312    else
313      print_status "warning" "Unread messages detected:"
314      while IFS='|' read -r to_role count; do
315        echo -e "    ${YELLOW}→${NC} $to_role: $count unread"
316      done <<< "$result"
317    fi
318  
319    # Failed message processing
320    local failed=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
321      SELECT COUNT(*) FROM messages WHERE processing_error IS NOT NULL;
322    " 2>/dev/null | xargs)
323  
324    if [ "$failed" -gt 0 ]; then
325      print_status "error" "${RED}$failed${NC} message(s) failed processing"
326    fi
327  
328    # Total messages
329    local total=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
330      SELECT COUNT(*) FROM messages;
331    " 2>/dev/null | xargs)
332  
333    echo ""
334    print_status "info" "Total messages: $total"
335  }
336  
337  ###############################################################################
338  # Decision Status
339  ###############################################################################
340  
341  check_decisions() {
342    print_header "Decision Status"
343  
344    if ! command -v psql &> /dev/null; then
345      print_status "error" "Cannot check decisions - psql not available"
346      return 1
347    fi
348  
349    # Decisions by mode
350    local query="
351      SELECT
352        mode,
353        COUNT(*)
354      FROM decisions
355      GROUP BY mode
356      ORDER BY COUNT(*) DESC;
357    "
358  
359    local result=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -A -F'|' -c "$query" 2>/dev/null)
360  
361    if [ -z "$result" ]; then
362      print_status "info" "No decisions recorded yet"
363      return 0
364    fi
365  
366    echo -e "${BOLD}  Decision Mode       Count${NC}"
367    echo -e "  ─────────────────────────────"
368    while IFS='|' read -r mode count; do
369      echo -e "  $(printf '%-18s' "$mode") $count"
370    done <<< "$result"
371  
372    # Pending decisions
373    local pending=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
374      SELECT COUNT(*) FROM decisions WHERE status = 'pending';
375    " 2>/dev/null | xargs)
376  
377    if [ "$pending" -gt 0 ]; then
378      echo ""
379      print_status "warning" "${YELLOW}$pending${NC} decision(s) pending"
380    fi
381  }
382  
383  ###############################################################################
384  # System Summary
385  ###############################################################################
386  
387  print_summary() {
388    print_header "System Summary"
389  
390    local postgres_ok=0
391    local redis_ok=0
392    local agents_ok=0
393  
394    # Check if services are up
395    if PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c '\q' 2>/dev/null; then
396      postgres_ok=1
397    fi
398  
399    if command -v redis-cli &> /dev/null && redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping &>/dev/null; then
400      redis_ok=1
401    fi
402  
403    # Count healthy agents
404    local healthy_count=0
405    if [ "$postgres_ok" -eq 1 ]; then
406      healthy_count=$(PGPASSWORD="${DB_PASSWORD:-postgres}" psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t -c "
407        SELECT COUNT(*)
408        FROM agent_status
409        WHERE EXTRACT(EPOCH FROM (NOW() - last_heartbeat)) < 30;
410      " 2>/dev/null | xargs || echo "0")
411    fi
412  
413    if [ "$healthy_count" -gt 0 ]; then
414      agents_ok=1
415    fi
416  
417    # Overall status
418    if [ "$postgres_ok" -eq 1 ] && [ "$redis_ok" -eq 1 ] && [ "$agents_ok" -eq 1 ]; then
419      echo -e "  ${GREEN}●${NC} System Status: ${BOLD}${GREEN}OPERATIONAL${NC}"
420    elif [ "$postgres_ok" -eq 1 ] && [ "$redis_ok" -eq 1 ]; then
421      echo -e "  ${YELLOW}●${NC} System Status: ${BOLD}${YELLOW}DEGRADED${NC} (no healthy agents)"
422    else
423      echo -e "  ${RED}●${NC} System Status: ${BOLD}${RED}DOWN${NC}"
424    fi
425  
426    echo ""
427    echo -e "  ${BOLD}Infrastructure:${NC}"
428    [ "$postgres_ok" -eq 1 ] && echo -e "    ${GREEN}✓${NC} PostgreSQL" || echo -e "    ${RED}✗${NC} PostgreSQL"
429    [ "$redis_ok" -eq 1 ] && echo -e "    ${GREEN}✓${NC} Redis" || echo -e "    ${RED}✗${NC} Redis"
430  
431    if [ "$postgres_ok" -eq 1 ]; then
432      echo ""
433      echo -e "  ${BOLD}Agents:${NC}"
434      echo -e "    ${GREEN}✓${NC} $healthy_count / ${#AGENTS[@]} agents healthy"
435    fi
436  
437    echo ""
438  }
439  
440  ###############################################################################
441  # Main Menu
442  ###############################################################################
443  
444  show_help() {
445    cat << EOF
446  ${BOLD}ECHO System Status Monitor${NC}
447  
448  ${BOLD}USAGE:${NC}
449    ./echo.sh [COMMAND]
450  
451  ${BOLD}COMMANDS:${NC}
452    status      Show full system status (default)
453    agents      Show only agent health
454    infra       Show only infrastructure (PostgreSQL, Redis)
455    workflows   Show only workflow status
456    messages    Show only message queue status
457    decisions   Show only decision status
458    summary     Show quick system summary
459    help        Show this help message
460  
461  ${BOLD}ENVIRONMENT VARIABLES:${NC}
462    DB_HOST     PostgreSQL host (default: localhost)
463    DB_PORT     PostgreSQL port (default: 5432)
464    DB_NAME     Database name (default: echo_org)
465    DB_USER     Database user (default: postgres)
466    DB_PASSWORD Database password (default: postgres)
467    REDIS_HOST  Redis host (default: localhost)
468    REDIS_PORT  Redis port (default: 6379)
469  
470  ${BOLD}EXAMPLES:${NC}
471    ./echo.sh                    # Show full status
472    ./echo.sh agents             # Check only agents
473    ./echo.sh summary            # Quick overview
474    DB_HOST=prod.db ./echo.sh    # Check production database
475  
476  EOF
477  }
478  
479  ###############################################################################
480  # Main
481  ###############################################################################
482  
483  main() {
484    local command="${1:-status}"
485  
486    case $command in
487      status)
488        echo -e "${BOLD}${CYAN}"
489        echo "  ███████╗ ██████╗██╗  ██╗ ██████╗ "
490        echo "  ██╔════╝██╔════╝██║  ██║██╔═══██╗"
491        echo "  █████╗  ██║     ███████║██║   ██║"
492        echo "  ██╔══╝  ██║     ██╔══██║██║   ██║"
493        echo "  ███████╗╚██████╗██║  ██║╚██████╔╝"
494        echo "  ╚══════╝ ╚═════╝╚═╝  ╚═╝ ╚═════╝ "
495        echo -e "${NC}"
496        echo -e "  ${BOLD}Executive Coordination & Hierarchical Organization${NC}"
497        echo ""
498  
499        check_postgres
500        check_redis
501        check_agent_health
502        check_workflows
503        check_messages
504        check_decisions
505        print_summary
506        ;;
507  
508      agents)
509        check_agent_health
510        ;;
511  
512      infra)
513        check_postgres
514        check_redis
515        ;;
516  
517      workflows)
518        check_workflows
519        ;;
520  
521      messages)
522        check_messages
523        ;;
524  
525      decisions)
526        check_decisions
527        ;;
528  
529      summary)
530        print_summary
531        ;;
532  
533      help|--help|-h)
534        show_help
535        ;;
536  
537      *)
538        echo -e "${RED}Error: Unknown command '$command'${NC}"
539        echo ""
540        show_help
541        exit 1
542        ;;
543    esac
544  }
545  
546  # Run main function
547  main "$@"