benchmark.sh
1 #!/usr/bin/env bash 2 set -euo pipefail 3 4 # benchmark.sh — ML training benchmark wrapper 5 # Generated by cc-ml init. Called by rad-experiment benchmark. 6 # 7 # Runs the training command with unbuffered output, then extracts the 8 # final (last-occurrence) value for each metric. The regex patterns in 9 # autoresearch.yaml match the output format this script produces. 10 11 export PYTHONUNBUFFERED=1 12 13 TRAIN_CMD="__TRAIN_CMD__" 14 15 # Capture both stdout and stderr (many ML frameworks log to stderr). 16 OUTPUT_FILE=$(mktemp) 17 trap 'rm -f "$OUTPUT_FILE"' EXIT 18 19 echo "Running: $TRAIN_CMD" >&2 20 if ! eval "$TRAIN_CMD" > >(tee "$OUTPUT_FILE") 2>&1; then 21 echo "Training command failed" >&2 22 exit 1 23 fi 24 25 # Extract the last occurrence of each metric from training output. 26 # "Last occurrence" = final epoch values, not intermediate logging. 27 # 28 # The patterns below are replaced by cc-ml init based on framework 29 # detection. They match common logging formats: 30 # val_loss: 0.4523 31 # accuracy = 95.2 32 # train_loss : 0.312 33 # METRIC val_loss=0.4523 34 35 __METRIC_EXTRACTION__