skill-discovery-bench.sh
1 #!/bin/bash 2 # Skill discovery benchmark: measures use_skill trigger rate. 3 # Usage: ./scripts/skill-discovery-bench.sh [before|after] 4 set -euo pipefail 5 6 LABEL=${1:-"before"} 7 LOG=~/.shannon/logs/audit.log 8 MARKER=$(wc -l < "$LOG") 9 10 PROMPTS=( 11 "帮我创建一个 agent" 12 "分析这个 PDF" 13 "写一个 MCP server" 14 "做一个 landing page" 15 "帮我写项目状态更新" 16 "我想让 AI 能查数据库" 17 "这个 Go 函数有 bug,帮我看看" 18 "帮我写个单元测试" 19 "给这个报告换套主题" 20 "把这套流程封装成可复用的模板" 21 ) 22 23 for i in "${!PROMPTS[@]}"; do 24 echo "[$((i+1))/10] ${PROMPTS[$i]}" 25 shan -y "${PROMPTS[$i]}" </dev/null > /dev/null 2>"$LOG.bench_stderr" & 26 BGPID=$! 27 for tick in $(seq 1 60); do 28 kill -0 $BGPID 2>/dev/null || break 29 sleep 1 30 done 31 kill $BGPID 2>/dev/null; wait $BGPID 2>/dev/null 32 cat "$LOG.bench_stderr" | grep "^\[skill-discovery\]" || true 33 sleep 2 34 done 35 36 NEW_ENTRIES=$(tail -n +$((MARKER+1)) "$LOG") 37 SKILL_CALLS=$(echo "$NEW_ENTRIES" | grep -c '"use_skill"' || true) 38 TOTAL_CALLS=$(echo "$NEW_ENTRIES" | wc -l | tr -d ' ') 39 40 echo "" 41 echo "=== Results ($LABEL) ===" 42 echo "use_skill calls: $SKILL_CALLS / 10 prompts" 43 echo "Total tool calls: $TOTAL_CALLS" 44 echo "$NEW_ENTRIES" | grep '"use_skill"' | python3 -c " 45 import sys, json 46 for line in sys.stdin: 47 d = json.loads(line.strip()) 48 s = d.get('input_summary','') 49 try: 50 inp = json.loads(s) 51 print(f' -> {inp.get(\"skill_name\",\"?\")}')" 2>/dev/null || true