/ common / benchmark / timer.py
timer.py
  1  """
  2  Benchmark timing utilities for AI System Optimization Series.
  3  Provides accurate timing for CPU and CUDA operations.
  4  """
  5  
  6  import statistics
  7  import time
  8  from collections.abc import Callable
  9  from dataclasses import dataclass
 10  from typing import Any
 11  
 12  
 13  @dataclass
 14  class TimingResult:
 15      """Result of a benchmark timing run."""
 16  
 17      mean_ms: float
 18      std_ms: float
 19      min_ms: float
 20      max_ms: float
 21      iterations: int
 22  
 23      def __repr__(self) -> str:
 24          return (
 25              f"TimingResult(mean={self.mean_ms:.3f}ms, "
 26              f"std={self.std_ms:.3f}ms, "
 27              f"min={self.min_ms:.3f}ms, "
 28              f"max={self.max_ms:.3f}ms, "
 29              f"iters={self.iterations})"
 30          )
 31  
 32  
 33  def _try_cuda_sync() -> bool:
 34      """Try to synchronize CUDA if available."""
 35      try:
 36          import torch
 37  
 38          if torch.cuda.is_available():
 39              torch.cuda.synchronize()
 40              return True
 41      except ImportError:
 42          pass
 43      return False
 44  
 45  
 46  def benchmark_function(
 47      func: Callable[[], Any], warmup_iters: int = 10, bench_iters: int = 100, sync_cuda: bool = True
 48  ) -> TimingResult:
 49      """
 50      Benchmark a function with warmup and multiple iterations.
 51  
 52      Args:
 53          func: Function to benchmark (should take no arguments)
 54          warmup_iters: Number of warmup iterations
 55          bench_iters: Number of benchmark iterations
 56          sync_cuda: Whether to synchronize CUDA before/after timing
 57  
 58      Returns:
 59          TimingResult with timing statistics
 60      """
 61      # Warmup
 62      for _ in range(warmup_iters):
 63          func()
 64          if sync_cuda:
 65              _try_cuda_sync()
 66  
 67      # Benchmark
 68      times_ms: list[float] = []
 69      for _ in range(bench_iters):
 70          if sync_cuda:
 71              _try_cuda_sync()
 72  
 73          start = time.perf_counter()
 74          func()
 75  
 76          if sync_cuda:
 77              _try_cuda_sync()
 78  
 79          end = time.perf_counter()
 80          times_ms.append((end - start) * 1000)
 81  
 82      return TimingResult(
 83          mean_ms=statistics.mean(times_ms),
 84          std_ms=statistics.stdev(times_ms) if len(times_ms) > 1 else 0.0,
 85          min_ms=min(times_ms),
 86          max_ms=max(times_ms),
 87          iterations=bench_iters,
 88      )
 89  
 90  
 91  class CUDATimer:
 92      """CUDA event-based timer for more accurate GPU timing."""
 93  
 94      def __init__(self):
 95          self._start_event = None
 96          self._end_event = None
 97          self._torch_available = False
 98  
 99          try:
100              import torch
101  
102              if torch.cuda.is_available():
103                  self._torch_available = True
104                  self._start_event = torch.cuda.Event(enable_timing=True)
105                  self._end_event = torch.cuda.Event(enable_timing=True)
106          except ImportError:
107              pass
108  
109      def start(self) -> None:
110          """Record start event."""
111          if self._torch_available:
112              self._start_event.record()
113  
114      def stop(self) -> float:
115          """Record end event and return elapsed time in ms."""
116          if self._torch_available:
117              self._end_event.record()
118              self._end_event.synchronize()
119              return self._start_event.elapsed_time(self._end_event)
120          return 0.0
121  
122      @property
123      def available(self) -> bool:
124          return self._torch_available
125  
126  
127  def benchmark_cuda_function(
128      func: Callable[[], Any], warmup_iters: int = 10, bench_iters: int = 100
129  ) -> TimingResult | None:
130      """
131      Benchmark a CUDA function using CUDA events for accurate timing.
132  
133      Args:
134          func: Function to benchmark
135          warmup_iters: Number of warmup iterations
136          bench_iters: Number of benchmark iterations
137  
138      Returns:
139          TimingResult or None if CUDA is not available
140      """
141      timer = CUDATimer()
142      if not timer.available:
143          return None
144  
145      # Warmup
146      for _ in range(warmup_iters):
147          func()
148  
149      # Benchmark
150      times_ms: list[float] = []
151      for _ in range(bench_iters):
152          timer.start()
153          func()
154          elapsed = timer.stop()
155          times_ms.append(elapsed)
156  
157      return TimingResult(
158          mean_ms=statistics.mean(times_ms),
159          std_ms=statistics.stdev(times_ms) if len(times_ms) > 1 else 0.0,
160          min_ms=min(times_ms),
161          max_ms=max(times_ms),
162          iterations=bench_iters,
163      )