kernels.cuh
1 // kernels.cuh 2 // CUDA kernel declarations for Python bindings 3 #pragma once 4 5 // Scale vector elements: data[i] *= scale 6 void launch_vector_scale(float* data, float scale, int n); 7 8 // SAXPY: y = a*x + y 9 void launch_saxpy(float* y, float a, const float* x, int n); 10 11 // Dot product (result written to device memory) 12 void launch_dot_product(float* result, const float* a, const float* b, int n);