Cradicle Explorer

/ templates / benchmark.sh
benchmark.sh
 1  #!/usr/bin/env bash
 2  set -euo pipefail
 3  
 4  # benchmark.sh — ML training benchmark wrapper
 5  # Generated by cc-ml init. Called by rad-experiment benchmark.
 6  #
 7  # Runs the training command with unbuffered output, then extracts the
 8  # final (last-occurrence) value for each metric. The regex patterns in
 9  # autoresearch.yaml match the output format this script produces.
10  
11  export PYTHONUNBUFFERED=1
12  
13  TRAIN_CMD="__TRAIN_CMD__"
14  
15  # Capture both stdout and stderr (many ML frameworks log to stderr).
16  OUTPUT_FILE=$(mktemp)
17  trap 'rm -f "$OUTPUT_FILE"' EXIT
18  
19  echo "Running: $TRAIN_CMD" >&2
20  if ! eval "$TRAIN_CMD" > >(tee "$OUTPUT_FILE") 2>&1; then
21    echo "Training command failed" >&2
22    exit 1
23  fi
24  
25  # Extract the last occurrence of each metric from training output.
26  # "Last occurrence" = final epoch values, not intermediate logging.
27  #
28  # The patterns below are replaced by cc-ml init based on framework
29  # detection. They match common logging formats:
30  #   val_loss: 0.4523
31  #   accuracy = 95.2
32  #   train_loss : 0.312
33  #   METRIC val_loss=0.4523
34  
35  __METRIC_EXTRACTION__