sw_framebuffer.cpp
1 // Copyright 2017 Citra Emulator Project 2 // Licensed under GPLv2 or any later version 3 // Refer to the license.txt file included. 4 5 #include <algorithm> 6 #include "common/color.h" 7 #include "common/logging/log.h" 8 #include "core/memory.h" 9 #include "video_core/pica/regs_external.h" 10 #include "video_core/pica/regs_framebuffer.h" 11 #include "video_core/pica_types.h" 12 #include "video_core/renderer_software/sw_framebuffer.h" 13 #include "video_core/utils.h" 14 15 namespace SwRenderer { 16 17 using Pica::f16; 18 using Pica::FramebufferRegs; 19 20 namespace { 21 22 /// Decode/Encode for shadow map format. It is similar to D24S8 format, 23 /// but the depth field is in big-endian. 24 const Common::Vec2<u32> DecodeD24S8Shadow(const u8* bytes) { 25 return {static_cast<u32>((bytes[0] << 16) | (bytes[1] << 8) | bytes[2]), bytes[3]}; 26 } 27 28 void EncodeD24X8Shadow(u32 depth, u8* bytes) { 29 bytes[2] = depth & 0xFF; 30 bytes[1] = (depth >> 8) & 0xFF; 31 bytes[0] = (depth >> 16) & 0xFF; 32 } 33 34 void EncodeX24S8Shadow(u8 stencil, u8* bytes) { 35 bytes[3] = stencil; 36 } 37 } // Anonymous namespace 38 39 Framebuffer::Framebuffer(Memory::MemorySystem& memory_, const Pica::FramebufferRegs& regs_) 40 : memory{memory_}, regs{regs_} {} 41 42 Framebuffer::~Framebuffer() = default; 43 44 void Framebuffer::Bind() { 45 PAddr addr = regs.framebuffer.GetColorBufferPhysicalAddress(); 46 if (color_addr != addr) [[unlikely]] { 47 color_addr = addr; 48 color_buffer = memory.GetPhysicalPointer(color_addr); 49 } 50 51 addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); 52 if (depth_addr != addr) [[unlikely]] { 53 depth_addr = addr; 54 depth_buffer = memory.GetPhysicalPointer(depth_addr); 55 } 56 } 57 58 void Framebuffer::DrawPixel(u32 x, u32 y, const Common::Vec4<u8>& color) const { 59 const auto& framebuffer = regs.framebuffer; 60 // Similarly to textures, the render framebuffer is laid out from bottom to top, too. 61 // NOTE: The framebuffer height register contains the actual FB height minus one. 62 y = framebuffer.height - y; 63 64 const u32 coarse_y = y & ~7; 65 const u32 bytes_per_pixel = 66 Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value())); 67 const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + 68 coarse_y * framebuffer.width * bytes_per_pixel; 69 u8* dst_pixel = color_buffer + dst_offset; 70 71 switch (framebuffer.color_format) { 72 case FramebufferRegs::ColorFormat::RGBA8: 73 Common::Color::EncodeRGBA8(color, dst_pixel); 74 break; 75 case FramebufferRegs::ColorFormat::RGB8: 76 Common::Color::EncodeRGB8(color, dst_pixel); 77 break; 78 case FramebufferRegs::ColorFormat::RGB5A1: 79 Common::Color::EncodeRGB5A1(color, dst_pixel); 80 break; 81 case FramebufferRegs::ColorFormat::RGB565: 82 Common::Color::EncodeRGB565(color, dst_pixel); 83 break; 84 case FramebufferRegs::ColorFormat::RGBA4: 85 Common::Color::EncodeRGBA4(color, dst_pixel); 86 break; 87 default: 88 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}", 89 static_cast<u32>(framebuffer.color_format.Value())); 90 UNIMPLEMENTED(); 91 } 92 } 93 94 const Common::Vec4<u8> Framebuffer::GetPixel(u32 x, u32 y) const { 95 const auto& framebuffer = regs.framebuffer; 96 y = framebuffer.height - y; 97 98 const u32 coarse_y = y & ~7; 99 const u32 bytes_per_pixel = 100 Pica::BytesPerPixel(Pica::PixelFormat(framebuffer.color_format.Value())); 101 const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + 102 coarse_y * framebuffer.width * bytes_per_pixel; 103 const u8* src_pixel = color_buffer + src_offset; 104 105 switch (framebuffer.color_format) { 106 case FramebufferRegs::ColorFormat::RGBA8: 107 return Common::Color::DecodeRGBA8(src_pixel); 108 case FramebufferRegs::ColorFormat::RGB8: 109 return Common::Color::DecodeRGB8(src_pixel); 110 case FramebufferRegs::ColorFormat::RGB5A1: 111 return Common::Color::DecodeRGB5A1(src_pixel); 112 case FramebufferRegs::ColorFormat::RGB565: 113 return Common::Color::DecodeRGB565(src_pixel); 114 case FramebufferRegs::ColorFormat::RGBA4: 115 return Common::Color::DecodeRGBA4(src_pixel); 116 default: 117 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format {:x}", 118 static_cast<u32>(framebuffer.color_format.Value())); 119 UNIMPLEMENTED(); 120 } 121 122 return {0, 0, 0, 0}; 123 } 124 125 u32 Framebuffer::GetDepth(u32 x, u32 y) const { 126 const auto& framebuffer = regs.framebuffer; 127 y = framebuffer.height - y; 128 129 const u32 coarse_y = y & ~7; 130 const u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); 131 const u32 stride = framebuffer.width * bytes_per_pixel; 132 133 const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; 134 const u8* src_pixel = depth_buffer + src_offset; 135 136 switch (framebuffer.depth_format) { 137 case FramebufferRegs::DepthFormat::D16: 138 return Common::Color::DecodeD16(src_pixel); 139 case FramebufferRegs::DepthFormat::D24: 140 return Common::Color::DecodeD24(src_pixel); 141 case FramebufferRegs::DepthFormat::D24S8: 142 return Common::Color::DecodeD24S8(src_pixel).x; 143 default: 144 LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", 145 static_cast<u32>(framebuffer.depth_format.Value())); 146 UNIMPLEMENTED(); 147 return 0; 148 } 149 } 150 151 u8 Framebuffer::GetStencil(u32 x, u32 y) const { 152 const auto& framebuffer = regs.framebuffer; 153 y = framebuffer.height - y; 154 155 const u32 coarse_y = y & ~7; 156 const u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); 157 const u32 stride = framebuffer.width * bytes_per_pixel; 158 159 const u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; 160 const u8* src_pixel = depth_buffer + src_offset; 161 162 switch (framebuffer.depth_format) { 163 case FramebufferRegs::DepthFormat::D24S8: 164 return Common::Color::DecodeD24S8(src_pixel).y; 165 default: 166 LOG_WARNING( 167 HW_GPU, 168 "GetStencil called for function which doesn't have a stencil component (format {})", 169 static_cast<u32>(framebuffer.depth_format.Value())); 170 return 0; 171 } 172 } 173 174 void Framebuffer::SetDepth(u32 x, u32 y, u32 value) const { 175 const auto& framebuffer = regs.framebuffer; 176 y = framebuffer.height - y; 177 178 const u32 coarse_y = y & ~7; 179 const u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); 180 const u32 stride = framebuffer.width * bytes_per_pixel; 181 182 const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; 183 u8* dst_pixel = depth_buffer + dst_offset; 184 185 switch (framebuffer.depth_format) { 186 case FramebufferRegs::DepthFormat::D16: 187 Common::Color::EncodeD16(value, dst_pixel); 188 break; 189 case FramebufferRegs::DepthFormat::D24: 190 Common::Color::EncodeD24(value, dst_pixel); 191 break; 192 case FramebufferRegs::DepthFormat::D24S8: 193 Common::Color::EncodeD24X8(value, dst_pixel); 194 break; 195 default: 196 LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", 197 static_cast<u32>(framebuffer.depth_format.Value())); 198 UNIMPLEMENTED(); 199 break; 200 } 201 } 202 203 void Framebuffer::SetStencil(u32 x, u32 y, u8 value) const { 204 const auto& framebuffer = regs.framebuffer; 205 y = framebuffer.height - y; 206 207 const u32 coarse_y = y & ~7; 208 const u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format); 209 const u32 stride = framebuffer.width * bytes_per_pixel; 210 211 const u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; 212 u8* dst_pixel = depth_buffer + dst_offset; 213 214 switch (framebuffer.depth_format) { 215 case Pica::FramebufferRegs::DepthFormat::D16: 216 case Pica::FramebufferRegs::DepthFormat::D24: 217 // Nothing to do 218 break; 219 case Pica::FramebufferRegs::DepthFormat::D24S8: 220 Common::Color::EncodeX24S8(value, dst_pixel); 221 break; 222 default: 223 LOG_CRITICAL(HW_GPU, "Unimplemented depth format {}", 224 static_cast<u32>(framebuffer.depth_format.Value())); 225 UNIMPLEMENTED(); 226 break; 227 } 228 } 229 230 void Framebuffer::DrawShadowMapPixel(u32 x, u32 y, u32 depth, u8 stencil) const { 231 const auto& framebuffer = regs.framebuffer; 232 const auto& shadow = regs.shadow; 233 const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); 234 235 y = framebuffer.height - y; 236 237 const u32 coarse_y = y & ~7; 238 u32 bytes_per_pixel = 4; 239 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + 240 coarse_y * framebuffer.width * bytes_per_pixel; 241 u8* shadow_buffer = memory.GetPhysicalPointer(addr); 242 u8* dst_pixel = shadow_buffer + dst_offset; 243 244 const auto ref = DecodeD24S8Shadow(dst_pixel); 245 const u32 ref_z = ref.x; 246 const u32 ref_s = ref.y; 247 248 if (depth >= ref_z) { 249 return; 250 } 251 252 if (stencil == 0) { 253 EncodeD24X8Shadow(depth, dst_pixel); 254 } else { 255 const f16 constant = f16::FromRaw(shadow.constant); 256 const f16 linear = f16::FromRaw(shadow.linear); 257 const f16 x_ = f16::FromFloat32(static_cast<float>(depth) / ref_z); 258 const f16 stencil_new = f16::FromFloat32(stencil) / (constant + linear * x_); 259 stencil = static_cast<u8>(std::clamp(stencil_new.ToFloat32(), 0.0f, 255.0f)); 260 261 if (stencil < ref_s) { 262 EncodeX24S8Shadow(stencil, dst_pixel); 263 } 264 } 265 } 266 267 u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) { 268 switch (action) { 269 case FramebufferRegs::StencilAction::Keep: 270 return old_stencil; 271 case FramebufferRegs::StencilAction::Zero: 272 return 0; 273 case FramebufferRegs::StencilAction::Replace: 274 return ref; 275 case FramebufferRegs::StencilAction::Increment: 276 // Saturated increment 277 return std::min<u8>(old_stencil, 254) + 1; 278 case FramebufferRegs::StencilAction::Decrement: 279 // Saturated decrement 280 return std::max<u8>(old_stencil, 1) - 1; 281 case FramebufferRegs::StencilAction::Invert: 282 return ~old_stencil; 283 case FramebufferRegs::StencilAction::IncrementWrap: 284 return old_stencil + 1; 285 case FramebufferRegs::StencilAction::DecrementWrap: 286 return old_stencil - 1; 287 default: 288 LOG_CRITICAL(HW_GPU, "Unknown stencil action {:x}", static_cast<int>(action)); 289 UNIMPLEMENTED(); 290 return 0; 291 } 292 } 293 294 Common::Vec4<u8> EvaluateBlendEquation(const Common::Vec4<u8>& src, 295 const Common::Vec4<u8>& srcfactor, 296 const Common::Vec4<u8>& dest, 297 const Common::Vec4<u8>& destfactor, 298 FramebufferRegs::BlendEquation equation) { 299 Common::Vec4i result; 300 301 const auto src_result = (src * srcfactor).Cast<s32>(); 302 const auto dst_result = (dest * destfactor).Cast<s32>(); 303 304 switch (equation) { 305 case FramebufferRegs::BlendEquation::Add: 306 result = (src_result + dst_result) / 255; 307 break; 308 case FramebufferRegs::BlendEquation::Subtract: 309 result = (src_result - dst_result) / 255; 310 break; 311 case FramebufferRegs::BlendEquation::ReverseSubtract: 312 result = (dst_result - src_result) / 255; 313 break; 314 case FramebufferRegs::BlendEquation::Min: 315 result.r() = std::min(src_result.r(), dst_result.r()) / 255; 316 result.g() = std::min(src_result.g(), dst_result.g()) / 255; 317 result.b() = std::min(src_result.b(), dst_result.b()) / 255; 318 result.a() = std::min(src_result.a(), dst_result.a()) / 255; 319 break; 320 case FramebufferRegs::BlendEquation::Max: 321 result.r() = std::max(src_result.r(), dst_result.r()) / 255; 322 result.g() = std::max(src_result.g(), dst_result.g()) / 255; 323 result.b() = std::max(src_result.b(), dst_result.b()) / 255; 324 result.a() = std::max(src_result.a(), dst_result.a()) / 255; 325 break; 326 default: 327 LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation 0x{:x}", equation); 328 UNIMPLEMENTED(); 329 } 330 331 return Common::Vec4<u8>(std::clamp(result.r(), 0, 255), std::clamp(result.g(), 0, 255), 332 std::clamp(result.b(), 0, 255), std::clamp(result.a(), 0, 255)); 333 }; 334 335 u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) { 336 switch (op) { 337 case FramebufferRegs::LogicOp::Clear: 338 return 0; 339 case FramebufferRegs::LogicOp::And: 340 return src & dest; 341 case FramebufferRegs::LogicOp::AndReverse: 342 return src & ~dest; 343 case FramebufferRegs::LogicOp::Copy: 344 return src; 345 case FramebufferRegs::LogicOp::Set: 346 return 255; 347 case FramebufferRegs::LogicOp::CopyInverted: 348 return ~src; 349 case FramebufferRegs::LogicOp::NoOp: 350 return dest; 351 case FramebufferRegs::LogicOp::Invert: 352 return ~dest; 353 case FramebufferRegs::LogicOp::Nand: 354 return ~(src & dest); 355 case FramebufferRegs::LogicOp::Or: 356 return src | dest; 357 case FramebufferRegs::LogicOp::Nor: 358 return ~(src | dest); 359 case FramebufferRegs::LogicOp::Xor: 360 return src ^ dest; 361 case FramebufferRegs::LogicOp::Equiv: 362 return ~(src ^ dest); 363 case FramebufferRegs::LogicOp::AndInverted: 364 return ~src & dest; 365 case FramebufferRegs::LogicOp::OrReverse: 366 return src | ~dest; 367 case FramebufferRegs::LogicOp::OrInverted: 368 return ~src | dest; 369 } 370 UNREACHABLE(); 371 }; 372 373 } // namespace SwRenderer