timer.py
1 """ 2 Benchmark timing utilities for AI System Optimization Series. 3 Provides accurate timing for CPU and CUDA operations. 4 """ 5 6 import statistics 7 import time 8 from collections.abc import Callable 9 from dataclasses import dataclass 10 from typing import Any 11 12 13 @dataclass 14 class TimingResult: 15 """Result of a benchmark timing run.""" 16 17 mean_ms: float 18 std_ms: float 19 min_ms: float 20 max_ms: float 21 iterations: int 22 23 def __repr__(self) -> str: 24 return ( 25 f"TimingResult(mean={self.mean_ms:.3f}ms, " 26 f"std={self.std_ms:.3f}ms, " 27 f"min={self.min_ms:.3f}ms, " 28 f"max={self.max_ms:.3f}ms, " 29 f"iters={self.iterations})" 30 ) 31 32 33 def _try_cuda_sync() -> bool: 34 """Try to synchronize CUDA if available.""" 35 try: 36 import torch 37 38 if torch.cuda.is_available(): 39 torch.cuda.synchronize() 40 return True 41 except ImportError: 42 pass 43 return False 44 45 46 def benchmark_function( 47 func: Callable[[], Any], warmup_iters: int = 10, bench_iters: int = 100, sync_cuda: bool = True 48 ) -> TimingResult: 49 """ 50 Benchmark a function with warmup and multiple iterations. 51 52 Args: 53 func: Function to benchmark (should take no arguments) 54 warmup_iters: Number of warmup iterations 55 bench_iters: Number of benchmark iterations 56 sync_cuda: Whether to synchronize CUDA before/after timing 57 58 Returns: 59 TimingResult with timing statistics 60 """ 61 # Warmup 62 for _ in range(warmup_iters): 63 func() 64 if sync_cuda: 65 _try_cuda_sync() 66 67 # Benchmark 68 times_ms: list[float] = [] 69 for _ in range(bench_iters): 70 if sync_cuda: 71 _try_cuda_sync() 72 73 start = time.perf_counter() 74 func() 75 76 if sync_cuda: 77 _try_cuda_sync() 78 79 end = time.perf_counter() 80 times_ms.append((end - start) * 1000) 81 82 return TimingResult( 83 mean_ms=statistics.mean(times_ms), 84 std_ms=statistics.stdev(times_ms) if len(times_ms) > 1 else 0.0, 85 min_ms=min(times_ms), 86 max_ms=max(times_ms), 87 iterations=bench_iters, 88 ) 89 90 91 class CUDATimer: 92 """CUDA event-based timer for more accurate GPU timing.""" 93 94 def __init__(self): 95 self._start_event = None 96 self._end_event = None 97 self._torch_available = False 98 99 try: 100 import torch 101 102 if torch.cuda.is_available(): 103 self._torch_available = True 104 self._start_event = torch.cuda.Event(enable_timing=True) 105 self._end_event = torch.cuda.Event(enable_timing=True) 106 except ImportError: 107 pass 108 109 def start(self) -> None: 110 """Record start event.""" 111 if self._torch_available: 112 self._start_event.record() 113 114 def stop(self) -> float: 115 """Record end event and return elapsed time in ms.""" 116 if self._torch_available: 117 self._end_event.record() 118 self._end_event.synchronize() 119 return self._start_event.elapsed_time(self._end_event) 120 return 0.0 121 122 @property 123 def available(self) -> bool: 124 return self._torch_available 125 126 127 def benchmark_cuda_function( 128 func: Callable[[], Any], warmup_iters: int = 10, bench_iters: int = 100 129 ) -> TimingResult | None: 130 """ 131 Benchmark a CUDA function using CUDA events for accurate timing. 132 133 Args: 134 func: Function to benchmark 135 warmup_iters: Number of warmup iterations 136 bench_iters: Number of benchmark iterations 137 138 Returns: 139 TimingResult or None if CUDA is not available 140 """ 141 timer = CUDATimer() 142 if not timer.available: 143 return None 144 145 # Warmup 146 for _ in range(warmup_iters): 147 func() 148 149 # Benchmark 150 times_ms: list[float] = [] 151 for _ in range(bench_iters): 152 timer.start() 153 func() 154 elapsed = timer.stop() 155 times_ms.append(elapsed) 156 157 return TimingResult( 158 mean_ms=statistics.mean(times_ms), 159 std_ms=statistics.stdev(times_ms) if len(times_ms) > 1 else 0.0, 160 min_ms=min(times_ms), 161 max_ms=max(times_ms), 162 iterations=bench_iters, 163 )