/ vulkan-shaders / dequant_funcs.comp
dequant_funcs.comp
1 #if !defined(DATA_A_F32) && !defined(DATA_A_F16) 2 #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require 3 #endif 4 5 #if defined(DATA_A_F32) 6 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 7 return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]); 8 } 9 #endif 10 11 #if defined(DATA_A_F16) 12 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 13 return vec2(data_a[a_offset + ib], data_a[a_offset + ib + 1]); 14 } 15 #endif 16 17 #if defined(DATA_A_Q4_0) 18 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 19 const float d = float(data_a[a_offset + ib].d); 20 const uint vui = uint(data_a[a_offset + ib].qs[iqs]); 21 return (vec2(vui & 0xF, vui >> 4) - 8.0f) * d; 22 } 23 #endif 24 25 #if defined(DATA_A_Q4_1) 26 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 27 const float d = float(data_a[a_offset + ib].d); 28 const float m = float(data_a[a_offset + ib].m); 29 const uint vui = uint(data_a[a_offset + ib].qs[iqs]); 30 return vec2(vui & 0xF, vui >> 4) * d + m; 31 } 32 #endif 33 34 #if defined(DATA_A_Q5_0) 35 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 36 const float d = float(data_a[a_offset + ib].d); 37 const uint uint_qh = uint(data_a[a_offset + ib].qh[1]) << 16 | data_a[a_offset + ib].qh[0]; 38 const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); 39 const uint vui = uint(data_a[a_offset + ib].qs[iqs]); 40 return (vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) - 16.0f) * d; 41 } 42 #endif 43 44 #if defined(DATA_A_Q5_1) 45 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 46 const float d = float(data_a[a_offset + ib].d); 47 const float m = float(data_a[a_offset + ib].m); 48 const uint uint_qh = data_a[a_offset + ib].qh; 49 const ivec2 qh = ivec2(((uint_qh >> iqs) << 4) & 0x10, (uint_qh >> (iqs + 12)) & 0x10); 50 const uint vui = uint(data_a[a_offset + ib].qs[iqs]); 51 return vec2((vui & 0xF) | qh.x, (vui >> 4) | qh.y) * d + m; 52 } 53 #endif 54 55 #if defined(DATA_A_Q8_0) 56 vec2 dequantize(uint ib, uint iqs, uint a_offset) { 57 const float d = float(data_a[a_offset + ib].d); 58 return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d; 59 } 60 #endif