/ src / video_core / renderer_software / sw_proctex.cpp
sw_proctex.cpp
  1  // Copyright 2017 Citra Emulator Project
  2  // Licensed under GPLv2 or any later version
  3  // Refer to the license.txt file included.
  4  
  5  #include <array>
  6  #include <cmath>
  7  #include "video_core/renderer_software/sw_proctex.h"
  8  
  9  namespace SwRenderer {
 10  
 11  namespace {
 12  using ProcTexClamp = Pica::TexturingRegs::ProcTexClamp;
 13  using ProcTexShift = Pica::TexturingRegs::ProcTexShift;
 14  using ProcTexCombiner = Pica::TexturingRegs::ProcTexCombiner;
 15  using ProcTexFilter = Pica::TexturingRegs::ProcTexFilter;
 16  using Pica::f16;
 17  
 18  float LookupLUT(const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& lut, float coord) {
 19      // For NoiseLUT/ColorMap/AlphaMap, coord=0.0 is lut[0], coord=127.0/128.0 is lut[127] and
 20      // coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
 21      // value entries and difference entries.
 22      coord *= 128;
 23      const int index_int = std::min(static_cast<int>(coord), 127);
 24      const float frac = coord - index_int;
 25      return lut[index_int].ToFloat() + frac * lut[index_int].DiffToFloat();
 26  }
 27  
 28  // These function are used to generate random noise for procedural texture. Their results are
 29  // verified against real hardware, but it's not known if the algorithm is the same as hardware.
 30  unsigned int NoiseRand1D(unsigned int v) {
 31      static constexpr std::array<unsigned int, 16> table{
 32          {0, 4, 10, 8, 4, 9, 7, 12, 5, 15, 13, 14, 11, 15, 2, 11}};
 33      return ((v % 9 + 2) * 3 & 0xF) ^ table[(v / 9) & 0xF];
 34  }
 35  
 36  float NoiseRand2D(unsigned int x, unsigned int y) {
 37      static constexpr std::array<unsigned int, 16> table{
 38          {10, 2, 15, 8, 0, 7, 4, 5, 5, 13, 2, 6, 13, 9, 3, 14}};
 39      unsigned int u2 = NoiseRand1D(x);
 40      unsigned int v2 = NoiseRand1D(y);
 41      v2 += ((u2 & 3) == 1) ? 4 : 0;
 42      v2 ^= (u2 & 1) * 6;
 43      v2 += 10 + u2;
 44      v2 &= 0xF;
 45      v2 ^= table[u2];
 46      return -1.0f + v2 * 2.0f / 15.0f;
 47  }
 48  
 49  float NoiseCoef(float u, float v, const Pica::TexturingRegs& regs,
 50                  const Pica::PicaCore::ProcTex& state) {
 51      const float freq_u = f16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32();
 52      const float freq_v = f16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32();
 53      const float phase_u = f16::FromRaw(regs.proctex_noise_u.phase).ToFloat32();
 54      const float phase_v = f16::FromRaw(regs.proctex_noise_v.phase).ToFloat32();
 55      const float x = 9 * freq_u * std::abs(u + phase_u);
 56      const float y = 9 * freq_v * std::abs(v + phase_v);
 57      const int x_int = static_cast<int>(x);
 58      const int y_int = static_cast<int>(y);
 59      const float x_frac = x - x_int;
 60      const float y_frac = y - y_int;
 61  
 62      const float g0 = NoiseRand2D(x_int, y_int) * (x_frac + y_frac);
 63      const float g1 = NoiseRand2D(x_int + 1, y_int) * (x_frac + y_frac - 1);
 64      const float g2 = NoiseRand2D(x_int, y_int + 1) * (x_frac + y_frac - 1);
 65      const float g3 = NoiseRand2D(x_int + 1, y_int + 1) * (x_frac + y_frac - 2);
 66      const float x_noise = LookupLUT(state.noise_table, x_frac);
 67      const float y_noise = LookupLUT(state.noise_table, y_frac);
 68      return Common::BilinearInterp(g0, g1, g2, g3, x_noise, y_noise);
 69  }
 70  
 71  float GetShiftOffset(float v, ProcTexShift mode, ProcTexClamp clamp_mode) {
 72      const float offset = (clamp_mode == ProcTexClamp::MirroredRepeat) ? 1 : 0.5f;
 73      switch (mode) {
 74      case ProcTexShift::None:
 75          return 0;
 76      case ProcTexShift::Odd:
 77          return offset * (((int)v / 2) % 2);
 78      case ProcTexShift::Even:
 79          return offset * ((((int)v + 1) / 2) % 2);
 80      default:
 81          LOG_CRITICAL(HW_GPU, "Unknown shift mode {}", mode);
 82          return 0;
 83      }
 84  };
 85  
 86  void ClampCoord(float& coord, ProcTexClamp mode) {
 87      switch (mode) {
 88      case ProcTexClamp::ToZero:
 89          if (coord > 1.0f)
 90              coord = 0.0f;
 91          break;
 92      case ProcTexClamp::ToEdge:
 93          coord = std::min(coord, 1.0f);
 94          break;
 95      case ProcTexClamp::SymmetricalRepeat:
 96          coord = coord - std::floor(coord);
 97          break;
 98      case ProcTexClamp::MirroredRepeat: {
 99          int integer = static_cast<int>(coord);
100          float frac = coord - integer;
101          coord = (integer % 2) == 0 ? frac : (1.0f - frac);
102          break;
103      }
104      case ProcTexClamp::Pulse:
105          if (coord <= 0.5f)
106              coord = 0.0f;
107          else
108              coord = 1.0f;
109          break;
110      default:
111          LOG_CRITICAL(HW_GPU, "Unknown clamp mode {}", mode);
112          coord = std::min(coord, 1.0f);
113          break;
114      }
115  }
116  
117  float CombineAndMap(float u, float v, ProcTexCombiner combiner,
118                      const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& map_table) {
119      float f;
120      switch (combiner) {
121      case ProcTexCombiner::U:
122          f = u;
123          break;
124      case ProcTexCombiner::U2:
125          f = u * u;
126          break;
127      case ProcTexCombiner::V:
128          f = v;
129          break;
130      case ProcTexCombiner::V2:
131          f = v * v;
132          break;
133      case ProcTexCombiner::Add:
134          f = (u + v) * 0.5f;
135          break;
136      case ProcTexCombiner::Add2:
137          f = (u * u + v * v) * 0.5f;
138          break;
139      case ProcTexCombiner::SqrtAdd2:
140          f = std::min(std::sqrt(u * u + v * v), 1.0f);
141          break;
142      case ProcTexCombiner::Min:
143          f = std::min(u, v);
144          break;
145      case ProcTexCombiner::Max:
146          f = std::max(u, v);
147          break;
148      case ProcTexCombiner::RMax:
149          f = std::min(((u + v) * 0.5f + std::sqrt(u * u + v * v)) * 0.5f, 1.0f);
150          break;
151      default:
152          LOG_CRITICAL(HW_GPU, "Unknown combiner {}", combiner);
153          f = 0.0f;
154          break;
155      }
156      return LookupLUT(map_table, f);
157  }
158  } // Anonymous namespace
159  
160  Common::Vec4<u8> ProcTex(float u, float v, const Pica::TexturingRegs& regs,
161                           const Pica::PicaCore::ProcTex& state) {
162      u = std::abs(u);
163      v = std::abs(v);
164  
165      // Get shift offset before noise generation
166      const float u_shift = GetShiftOffset(v, regs.proctex.u_shift, regs.proctex.u_clamp);
167      const float v_shift = GetShiftOffset(u, regs.proctex.v_shift, regs.proctex.v_clamp);
168  
169      // Generate noise
170      if (regs.proctex.noise_enable) {
171          float noise = NoiseCoef(u, v, regs, state);
172          u += noise * regs.proctex_noise_u.amplitude / 4095.0f;
173          v += noise * regs.proctex_noise_v.amplitude / 4095.0f;
174          u = std::abs(u);
175          v = std::abs(v);
176      }
177  
178      // Shift
179      u += u_shift;
180      v += v_shift;
181  
182      // Clamp
183      ClampCoord(u, regs.proctex.u_clamp);
184      ClampCoord(v, regs.proctex.v_clamp);
185  
186      // Combine and map
187      const float lut_coord = CombineAndMap(u, v, regs.proctex.color_combiner, state.color_map_table);
188  
189      // Look up the color
190      // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
191      const u32 offset = regs.proctex_lut_offset.level0;
192      const u32 width = regs.proctex_lut.width;
193      const float index = offset + (lut_coord * (width - 1));
194      Common::Vec4<u8> final_color;
195      // TODO(wwylele): implement mipmap
196      switch (regs.proctex_lut.filter) {
197      case ProcTexFilter::Linear:
198      case ProcTexFilter::LinearMipmapLinear:
199      case ProcTexFilter::LinearMipmapNearest: {
200          const int index_int = static_cast<int>(index);
201          const float frac = index - index_int;
202          const auto color_value = state.color_table[index_int].ToVector().Cast<float>();
203          const auto color_diff = state.color_diff_table[index_int].ToVector().Cast<float>();
204          final_color = (color_value + frac * color_diff).Cast<u8>();
205          break;
206      }
207      case ProcTexFilter::Nearest:
208      case ProcTexFilter::NearestMipmapLinear:
209      case ProcTexFilter::NearestMipmapNearest:
210          final_color = state.color_table[static_cast<int>(std::round(index))].ToVector();
211          break;
212      }
213  
214      if (regs.proctex.separate_alpha) {
215          // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
216          // uses the output of CombineAndMap directly instead.
217          const float final_alpha =
218              CombineAndMap(u, v, regs.proctex.alpha_combiner, state.alpha_map_table);
219          return Common::MakeVec<u8>(final_color.rgb(), static_cast<u8>(final_alpha * 255));
220      } else {
221          return final_color;
222      }
223  }
224  
225  } // namespace SwRenderer