/ tests / scripts / orchestrator-score-model.test.sh
orchestrator-score-model.test.sh
 1  #!/bin/sh
 2  # Tests for claude-orchestrator.sh — score_semantic model selection
 3  #
 4  # Validates that score_semantic batches use 'sonnet' (upgraded from 'haiku').
 5  
 6  set -e
 7  
 8  PASS=0
 9  FAIL=0
10  SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
11  PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
12  ORCHESTRATOR="$PROJECT_ROOT/scripts/claude-orchestrator.sh"
13  
14  pass() {
15    PASS=$((PASS + 1))
16    echo "  PASS: $1"
17  }
18  
19  fail() {
20    FAIL=$((FAIL + 1))
21    echo "  FAIL: $1"
22  }
23  
24  echo "Test 1: score_semantic uses sonnet model"
25  # Find the run_batch call for score_semantic and check the model argument
26  # Expected pattern: run_batch score_semantic sonnet
27  match=$(grep -E 'run_batch\s+score_semantic\s+' "$ORCHESTRATOR" | head -1)
28  if echo "$match" | grep -q 'run_batch score_semantic sonnet'; then
29    pass "score_semantic calls run_batch with 'sonnet' model"
30  else
31    fail "score_semantic should use 'sonnet' model, found: $match"
32  fi
33  
34  echo "Test 2: score_semantic does NOT use haiku"
35  if echo "$match" | grep -q 'haiku'; then
36    fail "score_semantic should not use 'haiku' (was upgraded to sonnet)"
37  else
38    pass "score_semantic does not use haiku"
39  fi
40  
41  echo "Test 3: Comment documents the upgrade from haiku"
42  # Check that there's a comment near score_semantic explaining the model change
43  context=$(grep -B5 'run_batch score_semantic' "$ORCHESTRATOR" | head -10)
44  if echo "$context" | grep -qi 'sonnet\|upgraded\|haiku'; then
45    pass "Comment near score_semantic documents the model choice"
46  else
47    fail "Expected a comment documenting why sonnet is used for score_semantic"
48  fi
49  
50  echo "Test 4: score_sites also uses sonnet (consistency check)"
51  score_sites_match=$(grep -E 'run_batch\s+score_sites\s+' "$ORCHESTRATOR" | head -1)
52  if [ -n "$score_sites_match" ]; then
53    if echo "$score_sites_match" | grep -q 'sonnet'; then
54      pass "score_sites also uses sonnet model"
55    else
56      model=$(echo "$score_sites_match" | sed 's/.*run_batch score_sites \([a-z]*\).*/\1/')
57      pass "score_sites uses '$model' model (different from score_semantic — may be intentional)"
58    fi
59  else
60    fail "Could not find run_batch call for score_sites"
61  fi
62  
63  echo ""
64  echo "Results: $PASS passed, $FAIL failed"
65  [ "$FAIL" -eq 0 ] || exit 1