/ vulkan-shaders / sum_rows.comp
sum_rows.comp
1 #version 450 2 3 #include "generic_head.comp" 4 #include "types.comp" 5 6 #extension GL_EXT_control_flow_attributes : enable 7 layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; 8 9 layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; 10 layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; 11 12 layout (constant_id = 0) const uint BLOCK_SIZE = 32; 13 14 shared FLOAT_TYPE tmp[BLOCK_SIZE]; 15 16 void main() { 17 const uint row = gl_WorkGroupID.x; 18 const uint col = gl_LocalInvocationID.x; 19 20 tmp[col] = FLOAT_TYPE(0.0f); 21 22 for (uint i = col; i < p.KX; i += BLOCK_SIZE) { 23 tmp[col] += FLOAT_TYPE(data_a[row*p.KX + i]); 24 } 25 26 barrier(); 27 [[unroll]] for (int s = int(BLOCK_SIZE) / 2; s > 0; s >>= 1) { 28 if (col < s) { 29 tmp[col] += tmp[col + s]; 30 } 31 barrier(); 32 } 33 34 if (col == 0) { 35 data_d[row] = D_TYPE(tmp[0]); 36 } 37 }