/ src / video_core / renderer_software / sw_framebuffer.cpp
sw_framebuffer.cpp
  1  // Copyright 2017 Citra Emulator Project
  2  // Licensed under GPLv2 or any later version
  3  // Refer to the license.txt file included.
  4  
  5  #include <algorithm>
  6  #include "common/color.h"
  7  #include "common/logging/log.h"
  8  #include "core/memory.h"
  9  #include "video_core/pica/regs_external.h"
 10  #include "video_core/pica/regs_framebuffer.h"
 11  #include "video_core/pica_types.h"
 12  #include "video_core/renderer_software/sw_framebuffer.h"
 13  #include "video_core/utils.h"
 14  
 15  namespace SwRenderer {
 16  
 17  using Pica::f16;
 18  using Pica::FramebufferRegs;
 19  
 20  namespace {
 21  
 22  /// Decode/Encode for shadow map format. It is similar to D24S8 format,
 23  /// but the depth field is in big-endian.
 24  const Common::Vec2<u32> DecodeD24S8Shadow(const u8* bytes) {
 25      return {static_cast<u32>((bytes[0] << 16) | (bytes[1] << 8) | bytes[2]), bytes[3]};
 26  }
 27  
 28  void EncodeD24X8Shadow(u32 depth, u8* bytes) {
 29      bytes[2] = depth & 0xFF;
 30      bytes[1] = (depth >> 8) & 0xFF;
 31      bytes[0] = (depth >> 16) & 0xFF;
 32  }
 33  
 34  void EncodeX24S8Shadow(u8 stencil, u8* bytes) {
 35      bytes[3] = stencil;
 36  }
 37  } // Anonymous namespace
 38  
 39  Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferRegs& regs_)
 40      : memory{memory_}, regs{regs_} {}
 41  
 42  Framebuffer::~Framebuffer() = default;
 43  
 44  void Framebuffer::Bind() {
 45      PAddr addr = regs.framebuffer.GetColorBufferPhysicalAddress();
 46      if (color_addr != addr) [[unlikely]] {
 47          color_addr = addr;
 48          color_buffer = memory.GetPhysicalPointer(color_addr);
 49      }
 50  
 51      addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
 52      if (depth_addr != addr) [[unlikely]] {
 53          depth_addr = addr;
 54          depth_buffer = memory.GetPhysicalPointer(depth_addr);
 55      }
 56  }
 57  
 58  void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const {
 59      const auto& framebuffer = regs.framebuffer;
 60      // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
 61      // NOTE: The framebuffer height register contains the actual FB height minus one.
 62      y = framebuffer.height - y;
 63  
 64      const u32 coarse_y = y & ~7;
 65      const u32 bytes_per_pixel =
 66          Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value()));
 67      const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
 68                             coarse_y * framebuffer.width * bytes_per_pixel;
 69      u8* dst_pixel = color_buffer + dst_offset;
 70  
 71      switch (framebuffer.color_format) {
 72      case FramebufferRegs::ColorFormat::RGBA8:
 73          Common::Color::EncodeRGBA8(color, dst_pixel);
 74          break;
 75      case FramebufferRegs::ColorFormat::RGB8:
 76          Common::Color::EncodeRGB8(color, dst_pixel);
 77          break;
 78      case FramebufferRegs::ColorFormat::RGB5A1:
 79          Common::Color::EncodeRGB5A1(color, dst_pixel);
 80          break;
 81      case FramebufferRegs::ColorFormat::RGB565:
 82          Common::Color::EncodeRGB565(color, dst_pixel);
 83          break;
 84      case FramebufferRegs::ColorFormat::RGBA4:
 85          Common::Color::EncodeRGBA4(color, dst_pixel);
 86          break;
 87      default:
 88          LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}",
 89                       static_cast<u32>(framebuffer.color_format.Value()));
 90          UNIMPLEMENTED();
 91      }
 92  }
 93  
 94  const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const {
 95      const auto& framebuffer = regs.framebuffer;
 96      y = framebuffer.height - y;
 97  
 98      const u32 coarse_y = y & ~7;
 99      const u32 bytes_per_pixel =
100          Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value()));
101      const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
102                             coarse_y * framebuffer.width * bytes_per_pixel;
103      const u8* src_pixel = color_buffer + src_offset;
104  
105      switch (framebuffer.color_format) {
106      case FramebufferRegs::ColorFormat::RGBA8:
107          return Common::Color::DecodeRGBA8(src_pixel);
108      case FramebufferRegs::ColorFormat::RGB8:
109          return Common::Color::DecodeRGB8(src_pixel);
110      case FramebufferRegs::ColorFormat::RGB5A1:
111          return Common::Color::DecodeRGB5A1(src_pixel);
112      case FramebufferRegs::ColorFormat::RGB565:
113          return Common::Color::DecodeRGB565(src_pixel);
114      case FramebufferRegs::ColorFormat::RGBA4:
115          return Common::Color::DecodeRGBA4(src_pixel);
116      default:
117          LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}",
118                       static_cast<u32>(framebuffer.color_format.Value()));
119          UNIMPLEMENTED();
120      }
121  
122      return {0, 0, 0, 0};
123  }
124  
125  u32 Framebuffer::GetDepth(u32 x, u32 y) const {
126      const auto& framebuffer = regs.framebuffer;
127      y = framebuffer.height - y;
128  
129      const u32 coarse_y = y & ~7;
130      const u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
131      const u32 stride = framebuffer.width * bytes_per_pixel;
132  
133      const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
134      const u8* src_pixel = depth_buffer + src_offset;
135  
136      switch (framebuffer.depth_format) {
137      case FramebufferRegs::DepthFormat::D16:
138          return Common::Color::DecodeD16(src_pixel);
139      case FramebufferRegs::DepthFormat::D24:
140          return Common::Color::DecodeD24(src_pixel);
141      case FramebufferRegs::DepthFormat::D24S8:
142          return Common::Color::DecodeD24S8(src_pixel).x;
143      default:
144          LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}",
145                       static_cast<u32>(framebuffer.depth_format.Value()));
146          UNIMPLEMENTED();
147          return 0;
148      }
149  }
150  
151  u8 Framebuffer::GetStencil(u32 x, u32 y) const {
152      const auto& framebuffer = regs.framebuffer;
153      y = framebuffer.height - y;
154  
155      const u32 coarse_y = y & ~7;
156      const u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
157      const u32 stride = framebuffer.width * bytes_per_pixel;
158  
159      const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
160      const u8* src_pixel = depth_buffer + src_offset;
161  
162      switch (framebuffer.depth_format) {
163      case FramebufferRegs::DepthFormat::D24S8:
164          return Common::Color::DecodeD24S8(src_pixel).y;
165      default:
166          LOG_WARNING(
167              HW_GPU,
168              "GetStencil called for function which doesn't have a stencil component (format {})",
169              static_cast<u32>(framebuffer.depth_format.Value()));
170          return 0;
171      }
172  }
173  
174  void Framebuffer::SetDepth(u32 x, u32 y, u32 value) const {
175      const auto& framebuffer = regs.framebuffer;
176      y = framebuffer.height - y;
177  
178      const u32 coarse_y = y & ~7;
179      const u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
180      const u32 stride = framebuffer.width * bytes_per_pixel;
181  
182      const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
183      u8* dst_pixel = depth_buffer + dst_offset;
184  
185      switch (framebuffer.depth_format) {
186      case FramebufferRegs::DepthFormat::D16:
187          Common::Color::EncodeD16(value, dst_pixel);
188          break;
189      case FramebufferRegs::DepthFormat::D24:
190          Common::Color::EncodeD24(value, dst_pixel);
191          break;
192      case FramebufferRegs::DepthFormat::D24S8:
193          Common::Color::EncodeD24X8(value, dst_pixel);
194          break;
195      default:
196          LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}",
197                       static_cast<u32>(framebuffer.depth_format.Value()));
198          UNIMPLEMENTED();
199          break;
200      }
201  }
202  
203  void Framebuffer::SetStencil(u32 x, u32 y, u8 value) const {
204      const auto& framebuffer = regs.framebuffer;
205      y = framebuffer.height - y;
206  
207      const u32 coarse_y = y & ~7;
208      const u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
209      const u32 stride = framebuffer.width * bytes_per_pixel;
210  
211      const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
212      u8* dst_pixel = depth_buffer + dst_offset;
213  
214      switch (framebuffer.depth_format) {
215      case Pica::FramebufferRegs::DepthFormat::D16:
216      case Pica::FramebufferRegs::DepthFormat::D24:
217          // Nothing to do
218          break;
219      case Pica::FramebufferRegs::DepthFormat::D24S8:
220          Common::Color::EncodeX24S8(value, dst_pixel);
221          break;
222      default:
223          LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}",
224                       static_cast<u32>(framebuffer.depth_format.Value()));
225          UNIMPLEMENTED();
226          break;
227      }
228  }
229  
230  void Framebuffer::DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const {
231      const auto& framebuffer = regs.framebuffer;
232      const auto& shadow = regs.shadow;
233      const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
234  
235      y = framebuffer.height - y;
236  
237      const u32 coarse_y = y & ~7;
238      u32 bytes_per_pixel = 4;
239      u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
240                       coarse_y * framebuffer.width * bytes_per_pixel;
241      u8* shadow_buffer = memory.GetPhysicalPointer(addr);
242      u8* dst_pixel = shadow_buffer + dst_offset;
243  
244      const auto ref = DecodeD24S8Shadow(dst_pixel);
245      const u32 ref_z = ref.x;
246      const u32 ref_s = ref.y;
247  
248      if (depth >= ref_z) {
249          return;
250      }
251  
252      if (stencil == 0) {
253          EncodeD24X8Shadow(depth, dst_pixel);
254      } else {
255          const f16 constant = f16::FromRaw(shadow.constant);
256          const f16 linear = f16::FromRaw(shadow.linear);
257          const f16 x_ = f16::FromFloat32(static_cast<float>(depth) / ref_z);
258          const f16 stencil_new = f16::FromFloat32(stencil) / (constant + linear * x_);
259          stencil = static_cast<u8>(std::clamp(stencil_new.ToFloat32(), 0.0f, 255.0f));
260  
261          if (stencil < ref_s) {
262              EncodeX24S8Shadow(stencil, dst_pixel);
263          }
264      }
265  }
266  
267  u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) {
268      switch (action) {
269      case FramebufferRegs::StencilAction::Keep:
270          return old_stencil;
271      case FramebufferRegs::StencilAction::Zero:
272          return 0;
273      case FramebufferRegs::StencilAction::Replace:
274          return ref;
275      case FramebufferRegs::StencilAction::Increment:
276          // Saturated increment
277          return std::min<u8>(old_stencil, 254) + 1;
278      case FramebufferRegs::StencilAction::Decrement:
279          // Saturated decrement
280          return std::max<u8>(old_stencil, 1) - 1;
281      case FramebufferRegs::StencilAction::Invert:
282          return ~old_stencil;
283      case FramebufferRegs::StencilAction::IncrementWrap:
284          return old_stencil + 1;
285      case FramebufferRegs::StencilAction::DecrementWrap:
286          return old_stencil - 1;
287      default:
288          LOG_CRITICAL(HW_GPU, "Unknown stencil action {:x}", static_cast<int>(action));
289          UNIMPLEMENTED();
290          return 0;
291      }
292  }
293  
294  Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src,
295                                         const Common::Vec4<u8>& srcfactor,
296                                         const Common::Vec4<u8>& dest,
297                                         const Common::Vec4<u8>& destfactor,
298                                         FramebufferRegs::BlendEquation equation) {
299      Common::Vec4i result;
300  
301      const auto src_result = (src * srcfactor).Cast<s32>();
302      const auto dst_result = (dest * destfactor).Cast<s32>();
303  
304      switch (equation) {
305      case FramebufferRegs::BlendEquation::Add:
306          result = (src_result + dst_result) / 255;
307          break;
308      case FramebufferRegs::BlendEquation::Subtract:
309          result = (src_result - dst_result) / 255;
310          break;
311      case FramebufferRegs::BlendEquation::ReverseSubtract:
312          result = (dst_result - src_result) / 255;
313          break;
314      case FramebufferRegs::BlendEquation::Min:
315          result.r() = std::min(src_result.r(), dst_result.r()) / 255;
316          result.g() = std::min(src_result.g(), dst_result.g()) / 255;
317          result.b() = std::min(src_result.b(), dst_result.b()) / 255;
318          result.a() = std::min(src_result.a(), dst_result.a()) / 255;
319          break;
320      case FramebufferRegs::BlendEquation::Max:
321          result.r() = std::max(src_result.r(), dst_result.r()) / 255;
322          result.g() = std::max(src_result.g(), dst_result.g()) / 255;
323          result.b() = std::max(src_result.b(), dst_result.b()) / 255;
324          result.a() = std::max(src_result.a(), dst_result.a()) / 255;
325          break;
326      default:
327          LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation 0x{:x}", equation);
328          UNIMPLEMENTED();
329      }
330  
331      return Common::Vec4<u8>(std::clamp(result.r(), 0, 255), std::clamp(result.g(), 0, 255),
332                              std::clamp(result.b(), 0, 255), std::clamp(result.a(), 0, 255));
333  };
334  
335  u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) {
336      switch (op) {
337      case FramebufferRegs::LogicOp::Clear:
338          return 0;
339      case FramebufferRegs::LogicOp::And:
340          return src & dest;
341      case FramebufferRegs::LogicOp::AndReverse:
342          return src & ~dest;
343      case FramebufferRegs::LogicOp::Copy:
344          return src;
345      case FramebufferRegs::LogicOp::Set:
346          return 255;
347      case FramebufferRegs::LogicOp::CopyInverted:
348          return ~src;
349      case FramebufferRegs::LogicOp::NoOp:
350          return dest;
351      case FramebufferRegs::LogicOp::Invert:
352          return ~dest;
353      case FramebufferRegs::LogicOp::Nand:
354          return ~(src & dest);
355      case FramebufferRegs::LogicOp::Or:
356          return src | dest;
357      case FramebufferRegs::LogicOp::Nor:
358          return ~(src | dest);
359      case FramebufferRegs::LogicOp::Xor:
360          return src ^ dest;
361      case FramebufferRegs::LogicOp::Equiv:
362          return ~(src ^ dest);
363      case FramebufferRegs::LogicOp::AndInverted:
364          return ~src & dest;
365      case FramebufferRegs::LogicOp::OrReverse:
366          return src | ~dest;
367      case FramebufferRegs::LogicOp::OrInverted:
368          return ~src | dest;
369      }
370      UNREACHABLE();
371  };
372  
373  } // namespace SwRenderer