Cradicle Explorer

/ scripts / testing / test_self_selection.sh
test_self_selection.sh
  1  #!/bin/bash
  2  
  3  # Test Agent Self-Selection Feature
  4  # Demonstrates dynamic participation evaluation
  5  
  6  set -e
  7  
  8  # Colors
  9  GREEN='\033[0;32m'
 10  BLUE='\033[0;34m'
 11  YELLOW='\033[1;33m'
 12  RED='\033[0;31m'
 13  NC='\033[0m'
 14  
 15  REDIS_PORT=6383
 16  DB_PORT=5433
 17  DB_USER=echo_org
 18  DB_NAME=echo_org
 19  DB_PASSWORD=postgres
 20  
 21  echo -e "${BLUE}╔═══════════════════════════════════════════════════════════════════╗${NC}"
 22  echo -e "${BLUE}║           Agent Self-Selection Test                               ║${NC}"
 23  echo -e "${BLUE}║   Testing dynamic participation with LLM evaluation               ║${NC}"
 24  echo -e "${BLUE}╚═══════════════════════════════════════════════════════════════════╝${NC}"
 25  echo ""
 26  
 27  # Check infrastructure
 28  echo -e "${BLUE}Checking infrastructure...${NC}"
 29  
 30  if ! PGPASSWORD="$DB_PASSWORD" psql -h localhost -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -c "SELECT 1" > /dev/null 2>&1; then
 31      echo -e "${RED}✗ PostgreSQL not accessible${NC}"
 32      exit 1
 33  fi
 34  echo -e "${GREEN}✓ PostgreSQL${NC}"
 35  
 36  if ! redis-cli -p "$REDIS_PORT" ping > /dev/null 2>&1; then
 37      echo -e "${RED}✗ Redis not running${NC}"
 38      exit 1
 39  fi
 40  echo -e "${GREEN}✓ Redis${NC}"
 41  
 42  if ! ollama list > /dev/null 2>&1; then
 43      echo -e "${YELLOW}⚠ Ollama not running (LLM features disabled)${NC}"
 44  fi
 45  
 46  # Start only CTO agent for focused test
 47  echo ""
 48  echo -e "${BLUE}Starting CTO agent...${NC}"
 49  
 50  cd apps/cto
 51  tail -f /dev/null | REDIS_PORT="$REDIS_PORT" REDIS_HOST=localhost DB_PORT="$DB_PORT" DB_USER="$DB_USER" DB_NAME="$DB_NAME" DB_PASSWORD="$DB_PASSWORD" ./cto > /tmp/cto_selfselect_test.log 2>&1 &
 52  CTO_PID=$!
 53  
 54  cd ../..
 55  
 56  echo -e "${GREEN}✓ CTO running (PID: $CTO_PID)${NC}"
 57  
 58  # Wait for startup
 59  echo -e "${BLUE}Waiting for agent to initialize...${NC}"
 60  sleep 3
 61  
 62  # Send broadcast messages
 63  echo ""
 64  echo -e "${YELLOW}Test 1: Technical task (CTO should participate)${NC}"
 65  
 66  redis-cli -p "$REDIS_PORT" PUBLISH "messages:all" "$(jq -n \
 67      --arg id "msg_test1_$(date +%s)" \
 68      --arg from "ceo" \
 69      --arg subject "Database performance optimization needed" \
 70      --arg content "We're experiencing slow queries in production. Need technical leadership to investigate and resolve." \
 71      '{
 72          id: $id,
 73          from: $from,
 74          to: "all",
 75          type: "task_broadcast",
 76          subject: $subject,
 77          content: $content,
 78          metadata: {
 79              timestamp: now | todate,
 80              priority: "high"
 81          }
 82      }')" > /dev/null
 83  
 84  echo -e "  Broadcast: Database performance optimization"
 85  sleep 2
 86  
 87  echo ""
 88  echo -e "${YELLOW}Test 2: HR task (CTO should decline)${NC}"
 89  
 90  redis-cli -p "$REDIS_PORT" PUBLISH "messages:all" "$(jq -n \
 91      --arg id "msg_test2_$(date +%s)" \
 92      --arg from "ceo" \
 93      --arg subject "Hiring new HR manager" \
 94      --arg content "We need to recruit a new HR manager for the Boston office. Looking for candidates with 5+ years experience in HR management." \
 95      '{
 96          id: $id,
 97          from: $from,
 98          to: "all",
 99          type: "task_broadcast",
100          subject: $subject,
101          content: $content,
102          metadata: {
103              timestamp: now | todate,
104              priority: "normal"
105          }
106      }')" > /dev/null
107  
108  echo -e "  Broadcast: Hiring HR manager"
109  sleep 2
110  
111  echo ""
112  echo -e "${YELLOW}Test 3: Mixed relevance (CTO should evaluate with LLM)${NC}"
113  
114  redis-cli -p "$REDIS_PORT" PUBLISH "messages:all" "$(jq -n \
115      --arg id "msg_test3_$(date +%s)" \
116      --arg from "ceo" \
117      --arg subject "How can AI agents develop curiosity?" \
118      --arg content "Research question: Can we implement genuine curiosity in AI agents? This requires technical innovation but also understanding of learning psychology." \
119      '{
120          id: $id,
121          from: $from,
122          to: "all",
123          type: "task_broadcast",
124          subject: $subject,
125          content: $content,
126          metadata: {
127              timestamp: now | todate,
128              priority: "normal"
129          }
130      }')" > /dev/null
131  
132  echo -e "  Broadcast: AI curiosity research"
133  sleep 3
134  
135  # Show results
136  echo ""
137  echo -e "${BLUE}═══════════════════════════════════════════════════════════════════${NC}"
138  echo -e "${BLUE}Results:${NC}"
139  echo ""
140  
141  # Extract participation decisions from log
142  echo -e "${GREEN}CTO Participation Decisions:${NC}"
143  echo ""
144  
145  grep -E "Fast-path:|LLM decided:|participating|declining" /tmp/cto_selfselect_test.log 2>/dev/null | tail -20 || echo "  (No participation logs yet - check /tmp/cto_selfselect_test.log)"
146  
147  echo ""
148  echo -e "${BLUE}Full log available at: /tmp/cto_selfselect_test.log${NC}"
149  
150  # Cleanup
151  echo ""
152  echo -e "${BLUE}Stopping CTO agent...${NC}"
153  kill $CTO_PID 2>/dev/null || true
154  wait $CTO_PID 2>/dev/null || true
155  echo -e "${GREEN}✓ Test complete${NC}"
156  
157  echo ""
158  echo -e "${YELLOW}Expected Behavior:${NC}"
159  echo -e "  Test 1: ${GREEN}CTO should PARTICIPATE${NC} (high keyword relevance + technical)"
160  echo -e "  Test 2: ${RED}CTO should DECLINE${NC} (HR-related, not technical)"
161  echo -e "  Test 3: ${BLUE}CTO should DEFER to LLM${NC} (ambiguous relevance)"