/ kompute-shaders / op_gelu.comp
op_gelu.comp
 1  #version 450
 2  
 3  #include "common.comp"
 4  
 5  layout(local_size_x = 1) in;
 6  
 7  layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; };
 8  layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; };
 9  layout(push_constant) uniform PushConstants {
10      uint inOff;
11      uint outOff;
12  } pcs;
13  
14  void main() {
15      const uint baseIndex = gl_WorkGroupID.x * 8;
16  
17      for (uint x = 0; x < 8; x++) {
18          const uint i = baseIndex + x;
19          const float y = in_[i + pcs.inOff];
20          out_[i + pcs.outOff] = 0.5*y*(1.0 + tanh(clamp(SQRT_2_OVER_PI*y*(1.0 + GELU_COEF_A*y*y), -15.0, 15.0)));
21      }
22  }