sw_proctex.cpp
1 // Copyright 2017 Citra Emulator Project 2 // Licensed under GPLv2 or any later version 3 // Refer to the license.txt file included. 4 5 #include <array> 6 #include <cmath> 7 #include "video_core/renderer_software/sw_proctex.h" 8 9 namespace SwRenderer { 10 11 namespace { 12 using ProcTexClamp = Pica::TexturingRegs::ProcTexClamp; 13 using ProcTexShift = Pica::TexturingRegs::ProcTexShift; 14 using ProcTexCombiner = Pica::TexturingRegs::ProcTexCombiner; 15 using ProcTexFilter = Pica::TexturingRegs::ProcTexFilter; 16 using Pica::f16; 17 18 float LookupLUT(const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& lut, float coord) { 19 // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and 20 // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using 21 // value entries and difference entries. 22 coord *= 128; 23 const int index_int = std::min(static_cast<int>(coord), 127); 24 const float frac = coord - index_int; 25 return lut[index_int].ToFloat() + frac * lut[index_int].DiffToFloat(); 26 } 27 28 // These function are used to generate random noise for procedural texture. Their results are 29 // verified against real hardware, but it's not known if the algorithm is the same as hardware. 30 unsigned int NoiseRand1D(unsigned int v) { 31 static constexpr std::array<unsigned int, 16> table{ 32 {0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}}; 33 return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF]; 34 } 35 36 float NoiseRand2D(unsigned int x, unsigned int y) { 37 static constexpr std::array<unsigned int, 16> table{ 38 {10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}}; 39 unsigned int u2 = NoiseRand1D(x); 40 unsigned int v2 = NoiseRand1D(y); 41 v2 += ((u2 & 3) == 1) ? 4 : 0; 42 v2 ^= (u2 & 1) * 6; 43 v2 += 10 + u2; 44 v2 &= 0xF; 45 v2 ^= table[u2]; 46 return -1.0f + v2 * 2.0f / 15.0f; 47 } 48 49 float NoiseCoef(float u, float v, const Pica::TexturingRegs& regs, 50 const Pica::PicaCore::ProcTex& state) { 51 const float freq_u = f16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(); 52 const float freq_v = f16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(); 53 const float phase_u = f16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(); 54 const float phase_v = f16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(); 55 const float x = 9 * freq_u * std::abs(u + phase_u); 56 const float y = 9 * freq_v * std::abs(v + phase_v); 57 const int x_int = static_cast<int>(x); 58 const int y_int = static_cast<int>(y); 59 const float x_frac = x - x_int; 60 const float y_frac = y - y_int; 61 62 const float g0 = NoiseRand2D(x_int, y_int) * (x_frac + y_frac); 63 const float g1 = NoiseRand2D(x_int + 1, y_int) * (x_frac + y_frac - 1); 64 const float g2 = NoiseRand2D(x_int, y_int + 1) * (x_frac + y_frac - 1); 65 const float g3 = NoiseRand2D(x_int + 1, y_int + 1) * (x_frac + y_frac - 2); 66 const float x_noise = LookupLUT(state.noise_table, x_frac); 67 const float y_noise = LookupLUT(state.noise_table, y_frac); 68 return Common::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise); 69 } 70 71 float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) { 72 const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f; 73 switch (mode) { 74 case ProcTexShift::None: 75 return 0; 76 case ProcTexShift::Odd: 77 return offset * (((int)v / 2) % 2); 78 case ProcTexShift::Even: 79 return offset * ((((int)v + 1) / 2) % 2); 80 default: 81 LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode); 82 return 0; 83 } 84 }; 85 86 void ClampCoord(float& coord, ProcTexClamp mode) { 87 switch (mode) { 88 case ProcTexClamp::ToZero: 89 if (coord > 1.0f) 90 coord = 0.0f; 91 break; 92 case ProcTexClamp::ToEdge: 93 coord = std::min(coord, 1.0f); 94 break; 95 case ProcTexClamp::SymmetricalRepeat: 96 coord = coord - std::floor(coord); 97 break; 98 case ProcTexClamp::MirroredRepeat: { 99 int integer = static_cast<int>(coord); 100 float frac = coord - integer; 101 coord = (integer % 2) == 0 ? frac : (1.0f - frac); 102 break; 103 } 104 case ProcTexClamp::Pulse: 105 if (coord <= 0.5f) 106 coord = 0.0f; 107 else 108 coord = 1.0f; 109 break; 110 default: 111 LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode); 112 coord = std::min(coord, 1.0f); 113 break; 114 } 115 } 116 117 float CombineAndMap(float u, float v, ProcTexCombiner combiner, 118 const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& map_table) { 119 float f; 120 switch (combiner) { 121 case ProcTexCombiner::U: 122 f = u; 123 break; 124 case ProcTexCombiner::U2: 125 f = u * u; 126 break; 127 case ProcTexCombiner::V: 128 f = v; 129 break; 130 case ProcTexCombiner::V2: 131 f = v * v; 132 break; 133 case ProcTexCombiner::Add: 134 f = (u + v) * 0.5f; 135 break; 136 case ProcTexCombiner::Add2: 137 f = (u * u + v * v) * 0.5f; 138 break; 139 case ProcTexCombiner::SqrtAdd2: 140 f = std::min(std::sqrt(u * u + v * v), 1.0f); 141 break; 142 case ProcTexCombiner::Min: 143 f = std::min(u, v); 144 break; 145 case ProcTexCombiner::Max: 146 f = std::max(u, v); 147 break; 148 case ProcTexCombiner::RMax: 149 f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f); 150 break; 151 default: 152 LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner); 153 f = 0.0f; 154 break; 155 } 156 return LookupLUT(map_table, f); 157 } 158 } // Anonymous namespace 159 160 Common::Vec4<u8> ProcTex(float u, float v, const Pica::TexturingRegs& regs, 161 const Pica::PicaCore::ProcTex& state) { 162 u = std::abs(u); 163 v = std::abs(v); 164 165 // Get shift offset before noise generation 166 const float u_shift = GetShiftOffset(v, regs.proctex.u_shift, regs.proctex.u_clamp); 167 const float v_shift = GetShiftOffset(u, regs.proctex.v_shift, regs.proctex.v_clamp); 168 169 // Generate noise 170 if (regs.proctex.noise_enable) { 171 float noise = NoiseCoef(u, v, regs, state); 172 u += noise * regs.proctex_noise_u.amplitude / 4095.0f; 173 v += noise * regs.proctex_noise_v.amplitude / 4095.0f; 174 u = std::abs(u); 175 v = std::abs(v); 176 } 177 178 // Shift 179 u += u_shift; 180 v += v_shift; 181 182 // Clamp 183 ClampCoord(u, regs.proctex.u_clamp); 184 ClampCoord(v, regs.proctex.v_clamp); 185 186 // Combine and map 187 const float lut_coord = CombineAndMap(u, v, regs.proctex.color_combiner, state.color_map_table); 188 189 // Look up the color 190 // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1] 191 const u32 offset = regs.proctex_lut_offset.level0; 192 const u32 width = regs.proctex_lut.width; 193 const float index = offset + (lut_coord * (width - 1)); 194 Common::Vec4<u8> final_color; 195 // TODO(wwylele): implement mipmap 196 switch (regs.proctex_lut.filter) { 197 case ProcTexFilter::Linear: 198 case ProcTexFilter::LinearMipmapLinear: 199 case ProcTexFilter::LinearMipmapNearest: { 200 const int index_int = static_cast<int>(index); 201 const float frac = index - index_int; 202 const auto color_value = state.color_table[index_int].ToVector().Cast<float>(); 203 const auto color_diff = state.color_diff_table[index_int].ToVector().Cast<float>(); 204 final_color = (color_value + frac * color_diff).Cast<u8>(); 205 break; 206 } 207 case ProcTexFilter::Nearest: 208 case ProcTexFilter::NearestMipmapLinear: 209 case ProcTexFilter::NearestMipmapNearest: 210 final_color = state.color_table[static_cast<int>(std::round(index))].ToVector(); 211 break; 212 } 213 214 if (regs.proctex.separate_alpha) { 215 // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It 216 // uses the output of CombineAndMap directly instead. 217 const float final_alpha = 218 CombineAndMap(u, v, regs.proctex.alpha_combiner, state.alpha_map_table); 219 return Common::MakeVec<u8>(final_color.rgb(), static_cast<u8>(final_alpha * 255)); 220 } else { 221 return final_color; 222 } 223 } 224 225 } // namespace SwRenderer