vk_rasterizer.cpp
1 // Copyright 2023 Citra Emulator Project 2 // Licensed under GPLv2 or any later version 3 // Refer to the license.txt file included. 4 5 #include "common/alignment.h" 6 #include "common/literals.h" 7 #include "common/logging/log.h" 8 #include "common/math_util.h" 9 #include "common/microprofile.h" 10 #include "common/settings.h" 11 #include "core/memory.h" 12 #include "video_core/pica/pica_core.h" 13 #include "video_core/renderer_vulkan/renderer_vulkan.h" 14 #include "video_core/renderer_vulkan/vk_instance.h" 15 #include "video_core/renderer_vulkan/vk_rasterizer.h" 16 #include "video_core/renderer_vulkan/vk_scheduler.h" 17 #include "video_core/texture/texture_decode.h" 18 19 namespace Vulkan { 20 21 namespace { 22 23 MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); 24 MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128)); 25 MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); 26 27 using TriangleTopology = Pica::PipelineRegs::TriangleTopology; 28 using VideoCore::SurfaceType; 29 30 using namespace Common::Literals; 31 using namespace Pica::Shader::Generator; 32 33 constexpr u64 STREAM_BUFFER_SIZE = 64_MiB; 34 constexpr u64 UNIFORM_BUFFER_SIZE = 4_MiB; 35 constexpr u64 TEXTURE_BUFFER_SIZE = 2_MiB; 36 37 constexpr vk::BufferUsageFlags BUFFER_USAGE = 38 vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer; 39 40 struct DrawParams { 41 u32 vertex_count; 42 s32 vertex_offset; 43 u32 binding_count; 44 std::array<u32, 16> bindings; 45 bool is_indexed; 46 }; 47 48 [[nodiscard]] u64 TextureBufferSize(const Instance& instance) { 49 // Use the smallest texel size from the texel views 50 // which corresponds to eR32G32Sfloat 51 const u64 max_size = instance.MaxTexelBufferElements() * 8; 52 return std::min(max_size, TEXTURE_BUFFER_SIZE); 53 } 54 55 } // Anonymous namespace 56 57 RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica, 58 VideoCore::CustomTexManager& custom_tex_manager, 59 VideoCore::RendererBase& renderer, 60 Frontend::EmuWindow& emu_window, const Instance& instance, 61 Scheduler& scheduler, DescriptorPool& pool, 62 RenderpassCache& renderpass_cache, u32 image_count) 63 : RasterizerAccelerated{memory, pica}, instance{instance}, scheduler{scheduler}, 64 renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache, 65 pool}, 66 runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(), 67 image_count}, 68 res_cache{memory, custom_tex_manager, runtime, regs, renderer}, 69 stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, 70 uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer, 71 UNIFORM_BUFFER_SIZE}, 72 texture_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer, 73 TextureBufferSize(instance)}, 74 texture_lf_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer, 75 TextureBufferSize(instance)}, 76 async_shaders{Settings::values.async_shader_compilation.GetValue()} { 77 78 vertex_buffers.fill(stream_buffer.Handle()); 79 80 uniform_buffer_alignment = instance.UniformMinAlignment(); 81 uniform_size_aligned_vs_pica = 82 Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment); 83 uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment); 84 uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment); 85 86 // Define vertex layout for software shaders 87 MakeSoftwareVertexLayout(); 88 pipeline_info.vertex_layout = software_layout; 89 90 const vk::Device device = instance.GetDevice(); 91 texture_lf_view = device.createBufferViewUnique({ 92 .buffer = texture_lf_buffer.Handle(), 93 .format = vk::Format::eR32G32Sfloat, 94 .offset = 0, 95 .range = VK_WHOLE_SIZE, 96 }); 97 texture_rg_view = device.createBufferViewUnique({ 98 .buffer = texture_buffer.Handle(), 99 .format = vk::Format::eR32G32Sfloat, 100 .offset = 0, 101 .range = VK_WHOLE_SIZE, 102 }); 103 texture_rgba_view = device.createBufferViewUnique({ 104 .buffer = texture_buffer.Handle(), 105 .format = vk::Format::eR32G32B32A32Sfloat, 106 .offset = 0, 107 .range = VK_WHOLE_SIZE, 108 }); 109 110 // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize 111 // all descriptor sets even the ones we don't use. 112 pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData)); 113 pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData)); 114 pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData)); 115 pipeline_cache.BindTexelBuffer(3, *texture_lf_view); 116 pipeline_cache.BindTexelBuffer(4, *texture_rg_view); 117 pipeline_cache.BindTexelBuffer(5, *texture_rgba_view); 118 119 Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); 120 Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); 121 for (u32 i = 0; i < 3; i++) { 122 pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle()); 123 } 124 125 for (u32 i = 0; i < 7; i++) { 126 pipeline_cache.BindStorageImage(i, null_surface.StorageView()); 127 } 128 129 SyncEntireState(); 130 } 131 132 RasterizerVulkan::~RasterizerVulkan() = default; 133 134 void RasterizerVulkan::TickFrame() { 135 res_cache.TickFrame(); 136 } 137 138 void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, 139 const VideoCore::DiskResourceLoadCallback& callback) { 140 pipeline_cache.LoadDiskCache(); 141 } 142 143 void RasterizerVulkan::SyncFixedState() { 144 SyncCullMode(); 145 SyncBlendEnabled(); 146 SyncBlendFuncs(); 147 SyncBlendColor(); 148 SyncLogicOp(); 149 SyncStencilTest(); 150 SyncDepthTest(); 151 SyncColorWriteMask(); 152 SyncStencilWriteMask(); 153 SyncDepthWriteMask(); 154 } 155 156 void RasterizerVulkan::SetupVertexArray() { 157 const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = vertex_info; 158 auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16); 159 160 /** 161 * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU 162 * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base 163 * address containing the vertex array data. The data for each attribute loader (i) can be found 164 * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought 165 * as something analogous to Vulkan bindings. The user can store attributes in separate loaders 166 * or interleave them in the same loader. 167 **/ 168 const auto& vertex_attributes = regs.pipeline.vertex_attributes; 169 const PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE 170 const u32 stride_alignment = instance.GetMinVertexStrideAlignment(); 171 172 VertexLayout& layout = pipeline_info.vertex_layout; 173 layout.binding_count = 0; 174 layout.attribute_count = 16; 175 enable_attributes.fill(false); 176 177 u32 buffer_offset = 0; 178 for (const auto& loader : vertex_attributes.attribute_loaders) { 179 if (loader.component_count == 0 || loader.byte_count == 0) { 180 continue; 181 } 182 183 // Analyze the attribute loader by checking which attributes it provides 184 u32 offset = 0; 185 for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) { 186 const u32 attribute_index = loader.GetComponent(comp); 187 if (attribute_index >= 12) { 188 // Attribute ids 12, to 15 signify 4, 8, 12 and 16-byte paddings respectively. 189 offset = Common::AlignUp(offset, 4); 190 offset += (attribute_index - 11) * 4; 191 continue; 192 } 193 194 const u32 size = vertex_attributes.GetNumElements(attribute_index); 195 if (size == 0) { 196 continue; 197 } 198 199 offset = 200 Common::AlignUp(offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); 201 202 const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); 203 const auto format = vertex_attributes.GetFormat(attribute_index); 204 205 VertexAttribute& attribute = layout.attributes[input_reg]; 206 attribute.binding.Assign(layout.binding_count); 207 attribute.location.Assign(input_reg); 208 attribute.offset.Assign(offset); 209 attribute.type.Assign(format); 210 attribute.size.Assign(size); 211 212 enable_attributes[input_reg] = true; 213 offset += vertex_attributes.GetStride(attribute_index); 214 } 215 216 const PAddr data_addr = 217 base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); 218 const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; 219 u32 data_size = loader.byte_count * vertex_num; 220 res_cache.FlushRegion(data_addr, data_size); 221 222 const MemoryRef src_ref = memory.GetPhysicalRef(data_addr); 223 if (src_ref.GetSize() < data_size) { 224 LOG_ERROR(Render_Vulkan, 225 "Vertex buffer size {} exceeds available space {} at address {:#016X}", 226 data_size, src_ref.GetSize(), data_addr); 227 } 228 229 const u8* src_ptr = src_ref.GetPtr(); 230 u8* dst_ptr = array_ptr + buffer_offset; 231 232 // Align stride up if required by Vulkan implementation. 233 const u32 aligned_stride = 234 Common::AlignUp(static_cast<u32>(loader.byte_count), stride_alignment); 235 if (aligned_stride == loader.byte_count) { 236 std::memcpy(dst_ptr, src_ptr, data_size); 237 } else { 238 for (std::size_t vertex = 0; vertex < vertex_num; vertex++) { 239 std::memcpy(dst_ptr + vertex * aligned_stride, src_ptr + vertex * loader.byte_count, 240 loader.byte_count); 241 } 242 } 243 244 // Create the binding associated with this loader 245 VertexBinding& binding = layout.bindings[layout.binding_count]; 246 binding.binding.Assign(layout.binding_count); 247 binding.fixed.Assign(0); 248 binding.stride.Assign(aligned_stride); 249 250 // Keep track of the binding offsets so we can bind the vertex buffer later 251 binding_offsets[layout.binding_count++] = static_cast<u32>(array_offset + buffer_offset); 252 buffer_offset += Common::AlignUp(aligned_stride * vertex_num, 4); 253 } 254 255 stream_buffer.Commit(buffer_offset); 256 257 // Assign the rest of the attributes to the last binding 258 SetupFixedAttribs(); 259 } 260 261 void RasterizerVulkan::SetupFixedAttribs() { 262 const auto& vertex_attributes = regs.pipeline.vertex_attributes; 263 VertexLayout& layout = pipeline_info.vertex_layout; 264 265 auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0); 266 binding_offsets[layout.binding_count] = static_cast<u32>(fixed_offset); 267 268 // Reserve the last binding for fixed and default attributes 269 // Place the default attrib at offset zero for easy access 270 static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f}; 271 std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f)); 272 273 // Find all fixed attributes and assign them to the last binding 274 u32 offset = sizeof(Common::Vec4f); 275 for (std::size_t i = 0; i < 16; i++) { 276 if (vertex_attributes.IsDefaultAttribute(i)) { 277 const u32 reg = regs.vs.GetRegisterForAttribute(i); 278 if (!enable_attributes[reg]) { 279 const auto& attr = pica.input_default_attributes[i]; 280 const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), 281 attr.w.ToFloat32()}; 282 283 const u32 data_size = sizeof(float) * static_cast<u32>(data.size()); 284 std::memcpy(fixed_ptr + offset, data.data(), data_size); 285 286 VertexAttribute& attribute = layout.attributes[reg]; 287 attribute.binding.Assign(layout.binding_count); 288 attribute.location.Assign(reg); 289 attribute.offset.Assign(offset); 290 attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); 291 attribute.size.Assign(4); 292 293 offset += data_size; 294 enable_attributes[reg] = true; 295 } 296 } 297 } 298 299 // Loop one more time to find unused attributes and assign them to the default one 300 // If the attribute is just disabled, shove the default attribute to avoid 301 // errors if the shader ever decides to use it. 302 for (u32 i = 0; i < 16; i++) { 303 if (!enable_attributes[i]) { 304 VertexAttribute& attribute = layout.attributes[i]; 305 attribute.binding.Assign(layout.binding_count); 306 attribute.location.Assign(i); 307 attribute.offset.Assign(0); 308 attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); 309 attribute.size.Assign(4); 310 } 311 } 312 313 // Define the fixed+default binding 314 VertexBinding& binding = layout.bindings[layout.binding_count]; 315 binding.binding.Assign(layout.binding_count++); 316 binding.fixed.Assign(1); 317 binding.stride.Assign(offset); 318 319 stream_buffer.Commit(offset); 320 } 321 322 bool RasterizerVulkan::SetupVertexShader() { 323 MICROPROFILE_SCOPE(Vulkan_VS); 324 return pipeline_cache.UseProgrammableVertexShader(regs, pica.vs_setup, 325 pipeline_info.vertex_layout); 326 } 327 328 bool RasterizerVulkan::SetupGeometryShader() { 329 MICROPROFILE_SCOPE(Vulkan_GS); 330 331 if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { 332 LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader"); 333 return false; 334 } 335 336 // Enable the quaternion fix-up geometry-shader only if we are actually doing per-fragment 337 // lighting and care about proper quaternions. Otherwise just use standard vertex+fragment 338 // shaders. We also don't need a geometry shader if the barycentric extension is supported. 339 if (regs.lighting.disable || instance.IsFragmentShaderBarycentricSupported()) { 340 pipeline_cache.UseTrivialGeometryShader(); 341 return true; 342 } 343 344 return pipeline_cache.UseFixedGeometryShader(regs); 345 } 346 347 bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { 348 if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { 349 if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { 350 return false; 351 } 352 if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { 353 return false; 354 } 355 } 356 357 pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); 358 if (regs.pipeline.triangle_topology == TriangleTopology::Fan && 359 !instance.IsTriangleFanSupported()) { 360 LOG_DEBUG(Render_Vulkan, 361 "Skipping accelerated draw with unsupported triangle fan topology"); 362 return false; 363 } 364 365 // Vertex data setup might involve scheduler flushes so perform it 366 // early to avoid invalidating our state in the middle of the draw. 367 vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment()); 368 SetupVertexArray(); 369 370 if (!SetupVertexShader()) { 371 return false; 372 } 373 if (!SetupGeometryShader()) { 374 return false; 375 } 376 377 return Draw(true, is_indexed); 378 } 379 380 bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { 381 if (is_indexed) { 382 SetupIndexArray(); 383 } 384 385 const bool wait_built = !async_shaders || regs.pipeline.num_vertices <= 6; 386 if (!pipeline_cache.BindPipeline(pipeline_info, wait_built)) { 387 return true; 388 } 389 390 const DrawParams params = { 391 .vertex_count = regs.pipeline.num_vertices, 392 .vertex_offset = -static_cast<s32>(vertex_info.vs_input_index_min), 393 .binding_count = pipeline_info.vertex_layout.binding_count, 394 .bindings = binding_offsets, 395 .is_indexed = is_indexed, 396 }; 397 398 scheduler.Record([this, params](vk::CommandBuffer cmdbuf) { 399 std::array<vk::DeviceSize, 16> offsets; 400 std::transform(params.bindings.begin(), params.bindings.end(), offsets.begin(), 401 [](u32 offset) { return static_cast<vk::DeviceSize>(offset); }); 402 cmdbuf.bindVertexBuffers(0, params.binding_count, vertex_buffers.data(), offsets.data()); 403 if (params.is_indexed) { 404 cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0); 405 } else { 406 cmdbuf.draw(params.vertex_count, 1, 0, 0); 407 } 408 }); 409 410 return true; 411 } 412 413 void RasterizerVulkan::SetupIndexArray() { 414 const bool index_u8 = regs.pipeline.index_array.format == 0; 415 const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported(); 416 const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2); 417 const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16; 418 419 const u8* index_data = 420 memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + 421 regs.pipeline.index_array.offset); 422 423 auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2); 424 425 if (index_u8 && !native_u8) { 426 u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr); 427 for (u32 i = 0; i < regs.pipeline.num_vertices; i++) { 428 index_ptr_u16[i] = index_data[i]; 429 } 430 } else { 431 std::memcpy(index_ptr, index_data, index_buffer_size); 432 } 433 434 stream_buffer.Commit(index_buffer_size); 435 436 scheduler.Record( 437 [this, index_offset = index_offset, index_type = index_type](vk::CommandBuffer cmdbuf) { 438 cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type); 439 }); 440 } 441 442 void RasterizerVulkan::DrawTriangles() { 443 if (vertex_batch.empty()) { 444 return; 445 } 446 447 pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List); 448 pipeline_info.vertex_layout = software_layout; 449 450 pipeline_cache.UseTrivialVertexShader(); 451 pipeline_cache.UseTrivialGeometryShader(); 452 453 Draw(false, false); 454 } 455 456 bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { 457 MICROPROFILE_SCOPE(Vulkan_Drawing); 458 459 const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); 460 const bool has_stencil = regs.framebuffer.HasStencil(); 461 462 const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask; 463 const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled(); 464 const bool using_color_fb = 465 regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; 466 const bool using_depth_fb = 467 !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && 468 (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || 469 (has_stencil && pipeline_info.depth_stencil.stencil_test_enable)); 470 471 const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); 472 const Framebuffer* framebuffer = fb_helper.Framebuffer(); 473 if (!framebuffer->Handle()) { 474 return true; 475 } 476 477 pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color); 478 pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth); 479 480 if (shadow_rendering) { 481 pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color)); 482 } else { 483 Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); 484 pipeline_cache.BindStorageImage(6, null_surface.StorageView()); 485 } 486 487 // Update scissor uniforms 488 const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); 489 if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 || 490 fs_uniform_block_data.data.scissor_x2 != scissor_x2 || 491 fs_uniform_block_data.data.scissor_y1 != scissor_y1 || 492 fs_uniform_block_data.data.scissor_y2 != scissor_y2) { 493 494 fs_uniform_block_data.data.scissor_x1 = scissor_x1; 495 fs_uniform_block_data.data.scissor_x2 = scissor_x2; 496 fs_uniform_block_data.data.scissor_y1 = scissor_y1; 497 fs_uniform_block_data.data.scissor_y2 = scissor_y2; 498 fs_uniform_block_data.dirty = true; 499 } 500 501 // Sync and bind the texture surfaces 502 SyncTextureUnits(framebuffer); 503 504 // Sync and bind the shader 505 if (shader_dirty) { 506 pipeline_cache.UseFragmentShader(regs, user_config); 507 shader_dirty = false; 508 } 509 510 // Sync the LUTs within the texture buffer 511 SyncAndUploadLUTs(); 512 SyncAndUploadLUTsLF(); 513 UploadUniforms(accelerate); 514 515 // Begin rendering 516 const auto draw_rect = fb_helper.DrawRect(); 517 renderpass_cache.BeginRendering(framebuffer, draw_rect); 518 519 // Configure viewport and scissor 520 const auto viewport = fb_helper.Viewport(); 521 pipeline_info.dynamic.viewport = Common::Rectangle<s32>{ 522 viewport.x, 523 viewport.y, 524 viewport.x + viewport.width, 525 viewport.y + viewport.height, 526 }; 527 pipeline_info.dynamic.scissor = draw_rect; 528 529 // Draw the vertex batch 530 bool succeeded = true; 531 if (accelerate) { 532 succeeded = AccelerateDrawBatchInternal(is_indexed); 533 } else { 534 pipeline_cache.BindPipeline(pipeline_info, true); 535 536 const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex); 537 const u32 vertex_count = static_cast<u32>(vertex_batch.size()); 538 const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex)); 539 540 std::memcpy(buffer, vertex_batch.data(), vertex_size); 541 stream_buffer.Commit(vertex_size); 542 543 scheduler.Record([this, offset = offset, vertex_count](vk::CommandBuffer cmdbuf) { 544 cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset); 545 cmdbuf.draw(vertex_count, 1, 0, 0); 546 }); 547 } 548 549 vertex_batch.clear(); 550 return succeeded; 551 } 552 553 void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { 554 using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; 555 556 const auto pica_textures = regs.texturing.GetTextures(); 557 for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { 558 const auto& texture = pica_textures[texture_index]; 559 560 // If the texture unit is disabled bind a null surface to it 561 if (!texture.enabled) { 562 const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); 563 const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); 564 pipeline_cache.BindTexture(texture_index, null_surface.ImageView(), 565 null_sampler.Handle()); 566 continue; 567 } 568 569 // Handle special tex0 configurations 570 if (texture_index == 0) { 571 switch (texture.config.type.Value()) { 572 case TextureType::Shadow2D: { 573 Surface& surface = res_cache.GetTextureSurface(texture); 574 surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; 575 pipeline_cache.BindStorageImage(0, surface.StorageView()); 576 continue; 577 } 578 case TextureType::ShadowCube: { 579 BindShadowCube(texture); 580 continue; 581 } 582 case TextureType::TextureCube: { 583 BindTextureCube(texture); 584 continue; 585 } 586 default: 587 UnbindSpecial(); 588 break; 589 } 590 } 591 592 // Bind the texture provided by the rasterizer cache 593 Surface& surface = res_cache.GetTextureSurface(texture); 594 Sampler& sampler = res_cache.GetSampler(texture.config); 595 if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) { 596 pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle()); 597 } 598 } 599 } 600 601 void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) { 602 using CubeFace = Pica::TexturingRegs::CubeFace; 603 auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); 604 constexpr std::array faces = { 605 CubeFace::PositiveX, CubeFace::NegativeX, CubeFace::PositiveY, 606 CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ, 607 }; 608 609 for (CubeFace face : faces) { 610 const u32 binding = static_cast<u32>(face); 611 info.physical_address = regs.texturing.GetCubePhysicalAddress(face); 612 613 const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info); 614 Surface& surface = res_cache.GetSurface(surface_id); 615 surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; 616 pipeline_cache.BindStorageImage(binding, surface.StorageView()); 617 } 618 } 619 620 void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { 621 using CubeFace = Pica::TexturingRegs::CubeFace; 622 const VideoCore::TextureCubeConfig config = { 623 .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), 624 .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX), 625 .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY), 626 .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY), 627 .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ), 628 .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ), 629 .width = texture.config.width, 630 .levels = texture.config.lod.max_level + 1, 631 .format = texture.format, 632 }; 633 634 Surface& surface = res_cache.GetTextureCube(config); 635 Sampler& sampler = res_cache.GetSampler(texture.config); 636 pipeline_cache.BindTexture(0, surface.ImageView(), sampler.Handle()); 637 } 638 639 bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, 640 Surface& surface, Sampler& sampler) { 641 const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); 642 const bool is_feedback_loop = color_view == surface.ImageView(); 643 if (!is_feedback_loop) { 644 return false; 645 } 646 647 // Make a temporary copy of the framebuffer to sample from 648 pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle()); 649 return true; 650 } 651 652 void RasterizerVulkan::UnbindSpecial() { 653 Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); 654 for (u32 i = 0; i < 6; i++) { 655 pipeline_cache.BindStorageImage(i, null_surface.StorageView()); 656 } 657 } 658 659 void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { 660 switch (id) { 661 // Culling 662 case PICA_REG_INDEX(rasterizer.cull_mode): 663 SyncCullMode(); 664 break; 665 666 // Blending 667 case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): 668 SyncBlendEnabled(); 669 // Update since logic op emulation depends on alpha blend enable. 670 SyncLogicOp(); 671 SyncColorWriteMask(); 672 break; 673 case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): 674 SyncBlendFuncs(); 675 break; 676 case PICA_REG_INDEX(framebuffer.output_merger.blend_const): 677 SyncBlendColor(); 678 break; 679 680 // Sync VK stencil test + stencil write mask 681 // (Pica stencil test function register also contains a stencil write mask) 682 case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): 683 SyncStencilTest(); 684 SyncStencilWriteMask(); 685 break; 686 case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): 687 case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): 688 SyncStencilTest(); 689 break; 690 691 // Sync VK depth test + depth and color write mask 692 // (Pica depth test function register also contains a depth and color write mask) 693 case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): 694 SyncDepthTest(); 695 SyncDepthWriteMask(); 696 SyncColorWriteMask(); 697 break; 698 699 // Sync VK depth and stencil write mask 700 // (This is a dedicated combined depth / stencil write-enable register) 701 case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): 702 SyncDepthWriteMask(); 703 SyncStencilWriteMask(); 704 break; 705 706 // Sync VK color write mask 707 // (This is a dedicated color write-enable register) 708 case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): 709 SyncColorWriteMask(); 710 break; 711 712 // Logic op 713 case PICA_REG_INDEX(framebuffer.output_merger.logic_op): 714 SyncLogicOp(); 715 // Update since color write mask is used to emulate no-op. 716 SyncColorWriteMask(); 717 break; 718 } 719 } 720 721 void RasterizerVulkan::FlushAll() { 722 res_cache.FlushAll(); 723 } 724 725 void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { 726 res_cache.FlushRegion(addr, size); 727 } 728 729 void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { 730 res_cache.InvalidateRegion(addr, size); 731 } 732 733 void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { 734 res_cache.FlushRegion(addr, size); 735 res_cache.InvalidateRegion(addr, size); 736 } 737 738 void RasterizerVulkan::ClearAll(bool flush) { 739 res_cache.ClearAll(flush); 740 } 741 742 bool RasterizerVulkan::AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) { 743 return res_cache.AccelerateDisplayTransfer(config); 744 } 745 746 bool RasterizerVulkan::AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) { 747 return res_cache.AccelerateTextureCopy(config); 748 } 749 750 bool RasterizerVulkan::AccelerateFill(const Pica::MemoryFillConfig& config) { 751 return res_cache.AccelerateFill(config); 752 } 753 754 bool RasterizerVulkan::AccelerateDisplay(const Pica::FramebufferConfig& config, 755 PAddr framebuffer_addr, u32 pixel_stride, 756 ScreenInfo& screen_info) { 757 if (framebuffer_addr == 0) [[unlikely]] { 758 return false; 759 } 760 761 VideoCore::SurfaceParams src_params; 762 src_params.addr = framebuffer_addr; 763 src_params.width = std::min(config.width.Value(), pixel_stride); 764 src_params.height = config.height; 765 src_params.stride = pixel_stride; 766 src_params.is_tiled = false; 767 src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); 768 src_params.UpdateParams(); 769 770 const auto [src_surface_id, src_rect] = 771 res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); 772 773 if (!src_surface_id) { 774 return false; 775 } 776 777 const Surface& src_surface = res_cache.GetSurface(src_surface_id); 778 const u32 scaled_width = src_surface.GetScaledWidth(); 779 const u32 scaled_height = src_surface.GetScaledHeight(); 780 781 screen_info.texcoords = Common::Rectangle<f32>( 782 (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, 783 (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); 784 785 screen_info.image_view = src_surface.ImageView(); 786 787 return true; 788 } 789 790 void RasterizerVulkan::MakeSoftwareVertexLayout() { 791 constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; 792 793 software_layout = VertexLayout{ 794 .binding_count = 1, 795 .attribute_count = 8, 796 }; 797 798 for (u32 i = 0; i < software_layout.binding_count; i++) { 799 VertexBinding& binding = software_layout.bindings[i]; 800 binding.binding.Assign(i); 801 binding.fixed.Assign(0); 802 binding.stride.Assign(sizeof(HardwareVertex)); 803 } 804 805 u32 offset = 0; 806 for (u32 i = 0; i < 8; i++) { 807 VertexAttribute& attribute = software_layout.attributes[i]; 808 attribute.binding.Assign(0); 809 attribute.location.Assign(i); 810 attribute.offset.Assign(offset); 811 attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); 812 attribute.size.Assign(sizes[i]); 813 offset += sizes[i] * sizeof(float); 814 } 815 } 816 817 void RasterizerVulkan::SyncCullMode() { 818 pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); 819 } 820 821 void RasterizerVulkan::SyncBlendEnabled() { 822 pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable; 823 } 824 825 void RasterizerVulkan::SyncBlendFuncs() { 826 pipeline_info.blending.color_blend_eq.Assign( 827 regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); 828 pipeline_info.blending.alpha_blend_eq.Assign( 829 regs.framebuffer.output_merger.alpha_blending.blend_equation_a); 830 pipeline_info.blending.src_color_blend_factor.Assign( 831 regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); 832 pipeline_info.blending.dst_color_blend_factor.Assign( 833 regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); 834 pipeline_info.blending.src_alpha_blend_factor.Assign( 835 regs.framebuffer.output_merger.alpha_blending.factor_source_a); 836 pipeline_info.blending.dst_alpha_blend_factor.Assign( 837 regs.framebuffer.output_merger.alpha_blending.factor_dest_a); 838 } 839 840 void RasterizerVulkan::SyncBlendColor() { 841 pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw; 842 } 843 844 void RasterizerVulkan::SyncLogicOp() { 845 if (instance.NeedsLogicOpEmulation()) { 846 // We need this in the fragment shader to emulate logic operations 847 shader_dirty = true; 848 } 849 850 pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op; 851 852 const bool is_logic_op_emulated = 853 instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; 854 const bool is_logic_op_noop = 855 regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; 856 if (is_logic_op_emulated && is_logic_op_noop) { 857 // Color output is disabled by logic operation. We use color write mask to skip 858 // color but allow depth write. 859 pipeline_info.blending.color_write_mask = 0; 860 } 861 } 862 863 void RasterizerVulkan::SyncColorWriteMask() { 864 const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0 865 ? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF 866 : 0; 867 868 const bool is_logic_op_emulated = 869 instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; 870 const bool is_logic_op_noop = 871 regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; 872 if (is_logic_op_emulated && is_logic_op_noop) { 873 // Color output is disabled by logic operation. We use color write mask to skip 874 // color but allow depth write. Return early to avoid overwriting this. 875 return; 876 } 877 878 pipeline_info.blending.color_write_mask = color_mask; 879 } 880 881 void RasterizerVulkan::SyncStencilWriteMask() { 882 pipeline_info.dynamic.stencil_write_mask = 883 (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) 884 ? static_cast<u32>(regs.framebuffer.output_merger.stencil_test.write_mask) 885 : 0; 886 } 887 888 void RasterizerVulkan::SyncDepthWriteMask() { 889 const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && 890 regs.framebuffer.output_merger.depth_write_enable); 891 pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable); 892 } 893 894 void RasterizerVulkan::SyncStencilTest() { 895 const auto& stencil_test = regs.framebuffer.output_merger.stencil_test; 896 const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format == 897 Pica::FramebufferRegs::DepthFormat::D24S8; 898 899 pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable); 900 pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail); 901 pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass); 902 pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail); 903 pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func); 904 pipeline_info.dynamic.stencil_reference = stencil_test.reference_value; 905 pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask; 906 } 907 908 void RasterizerVulkan::SyncDepthTest() { 909 const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || 910 regs.framebuffer.output_merger.depth_write_enable == 1; 911 const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1 912 ? regs.framebuffer.output_merger.depth_test_func.Value() 913 : Pica::FramebufferRegs::CompareFunc::Always; 914 915 pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled); 916 pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op); 917 } 918 919 void RasterizerVulkan::SyncAndUploadLUTsLF() { 920 constexpr std::size_t max_size = 921 sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + 922 sizeof(Common::Vec2f) * 128; // fog 923 924 if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) { 925 return; 926 } 927 928 std::size_t bytes_used = 0; 929 auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); 930 931 // Sync the lighting luts 932 if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) { 933 for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) { 934 if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) { 935 std::array<Common::Vec2f, 256> new_data; 936 const auto& source_lut = pica.lighting.luts[index]; 937 std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), 938 [](const auto& entry) { 939 return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; 940 }); 941 942 if (new_data != lighting_lut_data[index] || invalidate) { 943 lighting_lut_data[index] = new_data; 944 std::memcpy(buffer + bytes_used, new_data.data(), 945 new_data.size() * sizeof(Common::Vec2f)); 946 fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = 947 static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f)); 948 fs_uniform_block_data.dirty = true; 949 bytes_used += new_data.size() * sizeof(Common::Vec2f); 950 } 951 fs_uniform_block_data.lighting_lut_dirty[index] = false; 952 } 953 } 954 fs_uniform_block_data.lighting_lut_dirty_any = false; 955 } 956 957 // Sync the fog lut 958 if (fs_uniform_block_data.fog_lut_dirty || invalidate) { 959 std::array<Common::Vec2f, 128> new_data; 960 961 std::transform(pica.fog.lut.begin(), pica.fog.lut.end(), new_data.begin(), 962 [](const auto& entry) { 963 return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; 964 }); 965 966 if (new_data != fog_lut_data || invalidate) { 967 fog_lut_data = new_data; 968 std::memcpy(buffer + bytes_used, new_data.data(), 969 new_data.size() * sizeof(Common::Vec2f)); 970 fs_uniform_block_data.data.fog_lut_offset = 971 static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f)); 972 fs_uniform_block_data.dirty = true; 973 bytes_used += new_data.size() * sizeof(Common::Vec2f); 974 } 975 fs_uniform_block_data.fog_lut_dirty = false; 976 } 977 978 texture_lf_buffer.Commit(static_cast<u32>(bytes_used)); 979 } 980 981 void RasterizerVulkan::SyncAndUploadLUTs() { 982 const auto& proctex = pica.proctex; 983 constexpr std::size_t max_size = 984 sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha 985 sizeof(Common::Vec4f) * 256 + // proctex 986 sizeof(Common::Vec4f) * 256; // proctex diff 987 988 if (!fs_uniform_block_data.proctex_noise_lut_dirty && 989 !fs_uniform_block_data.proctex_color_map_dirty && 990 !fs_uniform_block_data.proctex_alpha_map_dirty && 991 !fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) { 992 return; 993 } 994 995 std::size_t bytes_used = 0; 996 auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); 997 998 // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap 999 auto sync_proctex_value_lut = 1000 [this, buffer = buffer, offset = offset, invalidate = invalidate, 1001 &bytes_used](const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& lut, 1002 std::array<Common::Vec2f, 128>& lut_data, int& lut_offset) { 1003 std::array<Common::Vec2f, 128> new_data; 1004 std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { 1005 return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; 1006 }); 1007 1008 if (new_data != lut_data || invalidate) { 1009 lut_data = new_data; 1010 std::memcpy(buffer + bytes_used, new_data.data(), 1011 new_data.size() * sizeof(Common::Vec2f)); 1012 lut_offset = static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f)); 1013 fs_uniform_block_data.dirty = true; 1014 bytes_used += new_data.size() * sizeof(Common::Vec2f); 1015 } 1016 }; 1017 1018 // Sync the proctex noise lut 1019 if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) { 1020 sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data, 1021 fs_uniform_block_data.data.proctex_noise_lut_offset); 1022 fs_uniform_block_data.proctex_noise_lut_dirty = false; 1023 } 1024 1025 // Sync the proctex color map 1026 if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) { 1027 sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data, 1028 fs_uniform_block_data.data.proctex_color_map_offset); 1029 fs_uniform_block_data.proctex_color_map_dirty = false; 1030 } 1031 1032 // Sync the proctex alpha map 1033 if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) { 1034 sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data, 1035 fs_uniform_block_data.data.proctex_alpha_map_offset); 1036 fs_uniform_block_data.proctex_alpha_map_dirty = false; 1037 } 1038 1039 // Sync the proctex lut 1040 if (fs_uniform_block_data.proctex_lut_dirty || invalidate) { 1041 std::array<Common::Vec4f, 256> new_data; 1042 1043 std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(), 1044 [](const auto& entry) { 1045 auto rgba = entry.ToVector() / 255.0f; 1046 return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; 1047 }); 1048 1049 if (new_data != proctex_lut_data || invalidate) { 1050 proctex_lut_data = new_data; 1051 std::memcpy(buffer + bytes_used, new_data.data(), 1052 new_data.size() * sizeof(Common::Vec4f)); 1053 fs_uniform_block_data.data.proctex_lut_offset = 1054 static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f)); 1055 fs_uniform_block_data.dirty = true; 1056 bytes_used += new_data.size() * sizeof(Common::Vec4f); 1057 } 1058 fs_uniform_block_data.proctex_lut_dirty = false; 1059 } 1060 1061 // Sync the proctex difference lut 1062 if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) { 1063 std::array<Common::Vec4f, 256> new_data; 1064 1065 std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(), 1066 new_data.begin(), [](const auto& entry) { 1067 auto rgba = entry.ToVector() / 255.0f; 1068 return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; 1069 }); 1070 1071 if (new_data != proctex_diff_lut_data || invalidate) { 1072 proctex_diff_lut_data = new_data; 1073 std::memcpy(buffer + bytes_used, new_data.data(), 1074 new_data.size() * sizeof(Common::Vec4f)); 1075 fs_uniform_block_data.data.proctex_diff_lut_offset = 1076 static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f)); 1077 fs_uniform_block_data.dirty = true; 1078 bytes_used += new_data.size() * sizeof(Common::Vec4f); 1079 } 1080 fs_uniform_block_data.proctex_diff_lut_dirty = false; 1081 } 1082 1083 texture_buffer.Commit(static_cast<u32>(bytes_used)); 1084 } 1085 1086 void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { 1087 const bool sync_vs_pica = accelerate_draw; 1088 const bool sync_vs = vs_uniform_block_data.dirty; 1089 const bool sync_fs = fs_uniform_block_data.dirty; 1090 if (!sync_vs_pica && !sync_vs && !sync_fs) { 1091 return; 1092 } 1093 1094 const u64 uniform_size = 1095 uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs; 1096 auto [uniforms, offset, invalidate] = 1097 uniform_buffer.Map(uniform_size, uniform_buffer_alignment); 1098 1099 u32 used_bytes = 0; 1100 1101 if (sync_vs || invalidate) { 1102 std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data, 1103 sizeof(vs_uniform_block_data.data)); 1104 1105 pipeline_cache.SetBufferOffset(1, offset + used_bytes); 1106 vs_uniform_block_data.dirty = false; 1107 used_bytes += static_cast<u32>(uniform_size_aligned_vs); 1108 } 1109 1110 if (sync_fs || invalidate) { 1111 std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data, 1112 sizeof(fs_uniform_block_data.data)); 1113 1114 pipeline_cache.SetBufferOffset(2, offset + used_bytes); 1115 fs_uniform_block_data.dirty = false; 1116 used_bytes += static_cast<u32>(uniform_size_aligned_fs); 1117 } 1118 1119 if (sync_vs_pica) { 1120 VSPicaUniformData vs_uniforms; 1121 vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup); 1122 std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); 1123 1124 pipeline_cache.SetBufferOffset(0, offset + used_bytes); 1125 used_bytes += static_cast<u32>(uniform_size_aligned_vs_pica); 1126 } 1127 1128 uniform_buffer.Commit(used_bytes); 1129 } 1130 1131 } // namespace Vulkan