kernels.cuh
1 // kernels.cuh 2 // CUDA kernel declarations for Python bindings 3 #pragma once 4 5 // Scale vector elements: data[i] *= scale 6 void launch_vector_scale(float *data, float scale, int n); 7 8 // SAXPY: y = a*x + y 9 void launch_saxpy(float *y, float a, const float *x, int n); 10 11 // Dot product (result written to device memory) 12 void launch_dot_product(float *result, const float *a, const float *b, int n);