/ src / video_core / renderer_vulkan / vk_rasterizer.cpp
vk_rasterizer.cpp
   1  // Copyright 2023 Citra Emulator Project
   2  // Licensed under GPLv2 or any later version
   3  // Refer to the license.txt file included.
   4  
   5  #include "common/alignment.h"
   6  #include "common/literals.h"
   7  #include "common/logging/log.h"
   8  #include "common/math_util.h"
   9  #include "common/microprofile.h"
  10  #include "common/settings.h"
  11  #include "core/memory.h"
  12  #include "video_core/pica/pica_core.h"
  13  #include "video_core/renderer_vulkan/renderer_vulkan.h"
  14  #include "video_core/renderer_vulkan/vk_instance.h"
  15  #include "video_core/renderer_vulkan/vk_rasterizer.h"
  16  #include "video_core/renderer_vulkan/vk_scheduler.h"
  17  #include "video_core/texture/texture_decode.h"
  18  
  19  namespace Vulkan {
  20  
  21  namespace {
  22  
  23  MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
  24  MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128));
  25  MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
  26  
  27  using TriangleTopology = Pica::PipelineRegs::TriangleTopology;
  28  using VideoCore::SurfaceType;
  29  
  30  using namespace Common::Literals;
  31  using namespace Pica::Shader::Generator;
  32  
  33  constexpr u64 STREAM_BUFFER_SIZE = 64_MiB;
  34  constexpr u64 UNIFORM_BUFFER_SIZE = 4_MiB;
  35  constexpr u64 TEXTURE_BUFFER_SIZE = 2_MiB;
  36  
  37  constexpr vk::BufferUsageFlags BUFFER_USAGE =
  38      vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
  39  
  40  struct DrawParams {
  41      u32 vertex_count;
  42      s32 vertex_offset;
  43      u32 binding_count;
  44      std::array<u32, 16> bindings;
  45      bool is_indexed;
  46  };
  47  
  48  [[nodiscard]] u64 TextureBufferSize(const Instance& instance) {
  49      // Use the smallest texel size from the texel views
  50      // which corresponds to eR32G32Sfloat
  51      const u64 max_size = instance.MaxTexelBufferElements() * 8;
  52      return std::min(max_size, TEXTURE_BUFFER_SIZE);
  53  }
  54  
  55  } // Anonymous namespace
  56  
  57  RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica,
  58                                     VideoCore::CustomTexManager& custom_tex_manager,
  59                                     VideoCore::RendererBase& renderer,
  60                                     Frontend::EmuWindow& emu_window, const Instance& instance,
  61                                     Scheduler& scheduler, DescriptorPool& pool,
  62                                     RenderpassCache& renderpass_cache, u32 image_count)
  63      : RasterizerAccelerated{memory, pica}, instance{instance}, scheduler{scheduler},
  64        renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache,
  65                                                           pool},
  66        runtime{instance,   scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(),
  67                image_count},
  68        res_cache{memory, custom_tex_manager, runtime, regs, renderer},
  69        stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
  70        uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer,
  71                       UNIFORM_BUFFER_SIZE},
  72        texture_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer,
  73                       TextureBufferSize(instance)},
  74        texture_lf_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer,
  75                          TextureBufferSize(instance)},
  76        async_shaders{Settings::values.async_shader_compilation.GetValue()} {
  77  
  78      vertex_buffers.fill(stream_buffer.Handle());
  79  
  80      uniform_buffer_alignment = instance.UniformMinAlignment();
  81      uniform_size_aligned_vs_pica =
  82          Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment);
  83      uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment);
  84      uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment);
  85  
  86      // Define vertex layout for software shaders
  87      MakeSoftwareVertexLayout();
  88      pipeline_info.vertex_layout = software_layout;
  89  
  90      const vk::Device device = instance.GetDevice();
  91      texture_lf_view = device.createBufferViewUnique({
  92          .buffer = texture_lf_buffer.Handle(),
  93          .format = vk::Format::eR32G32Sfloat,
  94          .offset = 0,
  95          .range = VK_WHOLE_SIZE,
  96      });
  97      texture_rg_view = device.createBufferViewUnique({
  98          .buffer = texture_buffer.Handle(),
  99          .format = vk::Format::eR32G32Sfloat,
 100          .offset = 0,
 101          .range = VK_WHOLE_SIZE,
 102      });
 103      texture_rgba_view = device.createBufferViewUnique({
 104          .buffer = texture_buffer.Handle(),
 105          .format = vk::Format::eR32G32B32A32Sfloat,
 106          .offset = 0,
 107          .range = VK_WHOLE_SIZE,
 108      });
 109  
 110      // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
 111      // all descriptor sets even the ones we don't use.
 112      pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData));
 113      pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData));
 114      pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData));
 115      pipeline_cache.BindTexelBuffer(3, *texture_lf_view);
 116      pipeline_cache.BindTexelBuffer(4, *texture_rg_view);
 117      pipeline_cache.BindTexelBuffer(5, *texture_rgba_view);
 118  
 119      Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
 120      Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
 121      for (u32 i = 0; i < 3; i++) {
 122          pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle());
 123      }
 124  
 125      for (u32 i = 0; i < 7; i++) {
 126          pipeline_cache.BindStorageImage(i, null_surface.StorageView());
 127      }
 128  
 129      SyncEntireState();
 130  }
 131  
 132  RasterizerVulkan::~RasterizerVulkan() = default;
 133  
 134  void RasterizerVulkan::TickFrame() {
 135      res_cache.TickFrame();
 136  }
 137  
 138  void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
 139                                           const VideoCore::DiskResourceLoadCallback& callback) {
 140      pipeline_cache.LoadDiskCache();
 141  }
 142  
 143  void RasterizerVulkan::SyncFixedState() {
 144      SyncCullMode();
 145      SyncBlendEnabled();
 146      SyncBlendFuncs();
 147      SyncBlendColor();
 148      SyncLogicOp();
 149      SyncStencilTest();
 150      SyncDepthTest();
 151      SyncColorWriteMask();
 152      SyncStencilWriteMask();
 153      SyncDepthWriteMask();
 154  }
 155  
 156  void RasterizerVulkan::SetupVertexArray() {
 157      const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = vertex_info;
 158      auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16);
 159  
 160      /**
 161       * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
 162       * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
 163       * address containing the vertex array data. The data for each attribute loader (i) can be found
 164       * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
 165       * as something analogous to Vulkan bindings. The user can store attributes in separate loaders
 166       * or interleave them in the same loader.
 167       **/
 168      const auto& vertex_attributes = regs.pipeline.vertex_attributes;
 169      const PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE
 170      const u32 stride_alignment = instance.GetMinVertexStrideAlignment();
 171  
 172      VertexLayout& layout = pipeline_info.vertex_layout;
 173      layout.binding_count = 0;
 174      layout.attribute_count = 16;
 175      enable_attributes.fill(false);
 176  
 177      u32 buffer_offset = 0;
 178      for (const auto& loader : vertex_attributes.attribute_loaders) {
 179          if (loader.component_count == 0 || loader.byte_count == 0) {
 180              continue;
 181          }
 182  
 183          // Analyze the attribute loader by checking which attributes it provides
 184          u32 offset = 0;
 185          for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) {
 186              const u32 attribute_index = loader.GetComponent(comp);
 187              if (attribute_index >= 12) {
 188                  // Attribute ids 12, to 15 signify 4, 8, 12 and 16-byte paddings respectively.
 189                  offset = Common::AlignUp(offset, 4);
 190                  offset += (attribute_index - 11) * 4;
 191                  continue;
 192              }
 193  
 194              const u32 size = vertex_attributes.GetNumElements(attribute_index);
 195              if (size == 0) {
 196                  continue;
 197              }
 198  
 199              offset =
 200                  Common::AlignUp(offset, vertex_attributes.GetElementSizeInBytes(attribute_index));
 201  
 202              const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index);
 203              const auto format = vertex_attributes.GetFormat(attribute_index);
 204  
 205              VertexAttribute& attribute = layout.attributes[input_reg];
 206              attribute.binding.Assign(layout.binding_count);
 207              attribute.location.Assign(input_reg);
 208              attribute.offset.Assign(offset);
 209              attribute.type.Assign(format);
 210              attribute.size.Assign(size);
 211  
 212              enable_attributes[input_reg] = true;
 213              offset += vertex_attributes.GetStride(attribute_index);
 214          }
 215  
 216          const PAddr data_addr =
 217              base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
 218          const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
 219          u32 data_size = loader.byte_count * vertex_num;
 220          res_cache.FlushRegion(data_addr, data_size);
 221  
 222          const MemoryRef src_ref = memory.GetPhysicalRef(data_addr);
 223          if (src_ref.GetSize() < data_size) {
 224              LOG_ERROR(Render_Vulkan,
 225                        "Vertex buffer size {} exceeds available space {} at address {:#016X}",
 226                        data_size, src_ref.GetSize(), data_addr);
 227          }
 228  
 229          const u8* src_ptr = src_ref.GetPtr();
 230          u8* dst_ptr = array_ptr + buffer_offset;
 231  
 232          // Align stride up if required by Vulkan implementation.
 233          const u32 aligned_stride =
 234              Common::AlignUp(static_cast<u32>(loader.byte_count), stride_alignment);
 235          if (aligned_stride == loader.byte_count) {
 236              std::memcpy(dst_ptr, src_ptr, data_size);
 237          } else {
 238              for (std::size_t vertex = 0; vertex < vertex_num; vertex++) {
 239                  std::memcpy(dst_ptr + vertex * aligned_stride, src_ptr + vertex * loader.byte_count,
 240                              loader.byte_count);
 241              }
 242          }
 243  
 244          // Create the binding associated with this loader
 245          VertexBinding& binding = layout.bindings[layout.binding_count];
 246          binding.binding.Assign(layout.binding_count);
 247          binding.fixed.Assign(0);
 248          binding.stride.Assign(aligned_stride);
 249  
 250          // Keep track of the binding offsets so we can bind the vertex buffer later
 251          binding_offsets[layout.binding_count++] = static_cast<u32>(array_offset + buffer_offset);
 252          buffer_offset += Common::AlignUp(aligned_stride * vertex_num, 4);
 253      }
 254  
 255      stream_buffer.Commit(buffer_offset);
 256  
 257      // Assign the rest of the attributes to the last binding
 258      SetupFixedAttribs();
 259  }
 260  
 261  void RasterizerVulkan::SetupFixedAttribs() {
 262      const auto& vertex_attributes = regs.pipeline.vertex_attributes;
 263      VertexLayout& layout = pipeline_info.vertex_layout;
 264  
 265      auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0);
 266      binding_offsets[layout.binding_count] = static_cast<u32>(fixed_offset);
 267  
 268      // Reserve the last binding for fixed and default attributes
 269      // Place the default attrib at offset zero for easy access
 270      static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
 271      std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
 272  
 273      // Find all fixed attributes and assign them to the last binding
 274      u32 offset = sizeof(Common::Vec4f);
 275      for (std::size_t i = 0; i < 16; i++) {
 276          if (vertex_attributes.IsDefaultAttribute(i)) {
 277              const u32 reg = regs.vs.GetRegisterForAttribute(i);
 278              if (!enable_attributes[reg]) {
 279                  const auto& attr = pica.input_default_attributes[i];
 280                  const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(),
 281                                           attr.w.ToFloat32()};
 282  
 283                  const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
 284                  std::memcpy(fixed_ptr + offset, data.data(), data_size);
 285  
 286                  VertexAttribute& attribute = layout.attributes[reg];
 287                  attribute.binding.Assign(layout.binding_count);
 288                  attribute.location.Assign(reg);
 289                  attribute.offset.Assign(offset);
 290                  attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
 291                  attribute.size.Assign(4);
 292  
 293                  offset += data_size;
 294                  enable_attributes[reg] = true;
 295              }
 296          }
 297      }
 298  
 299      // Loop one more time to find unused attributes and assign them to the default one
 300      // If the attribute is just disabled, shove the default attribute to avoid
 301      // errors if the shader ever decides to use it.
 302      for (u32 i = 0; i < 16; i++) {
 303          if (!enable_attributes[i]) {
 304              VertexAttribute& attribute = layout.attributes[i];
 305              attribute.binding.Assign(layout.binding_count);
 306              attribute.location.Assign(i);
 307              attribute.offset.Assign(0);
 308              attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
 309              attribute.size.Assign(4);
 310          }
 311      }
 312  
 313      // Define the fixed+default binding
 314      VertexBinding& binding = layout.bindings[layout.binding_count];
 315      binding.binding.Assign(layout.binding_count++);
 316      binding.fixed.Assign(1);
 317      binding.stride.Assign(offset);
 318  
 319      stream_buffer.Commit(offset);
 320  }
 321  
 322  bool RasterizerVulkan::SetupVertexShader() {
 323      MICROPROFILE_SCOPE(Vulkan_VS);
 324      return pipeline_cache.UseProgrammableVertexShader(regs, pica.vs_setup,
 325                                                        pipeline_info.vertex_layout);
 326  }
 327  
 328  bool RasterizerVulkan::SetupGeometryShader() {
 329      MICROPROFILE_SCOPE(Vulkan_GS);
 330  
 331      if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
 332          LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader");
 333          return false;
 334      }
 335  
 336      // Enable the quaternion fix-up geometry-shader only if we are actually doing per-fragment
 337      // lighting and care about proper quaternions. Otherwise just use standard vertex+fragment
 338      // shaders. We also don't need a geometry shader if the barycentric extension is supported.
 339      if (regs.lighting.disable || instance.IsFragmentShaderBarycentricSupported()) {
 340          pipeline_cache.UseTrivialGeometryShader();
 341          return true;
 342      }
 343  
 344      return pipeline_cache.UseFixedGeometryShader(regs);
 345  }
 346  
 347  bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
 348      if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
 349          if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) {
 350              return false;
 351          }
 352          if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) {
 353              return false;
 354          }
 355      }
 356  
 357      pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology);
 358      if (regs.pipeline.triangle_topology == TriangleTopology::Fan &&
 359          !instance.IsTriangleFanSupported()) {
 360          LOG_DEBUG(Render_Vulkan,
 361                    "Skipping accelerated draw with unsupported triangle fan topology");
 362          return false;
 363      }
 364  
 365      // Vertex data setup might involve scheduler flushes so perform it
 366      // early to avoid invalidating our state in the middle of the draw.
 367      vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment());
 368      SetupVertexArray();
 369  
 370      if (!SetupVertexShader()) {
 371          return false;
 372      }
 373      if (!SetupGeometryShader()) {
 374          return false;
 375      }
 376  
 377      return Draw(true, is_indexed);
 378  }
 379  
 380  bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
 381      if (is_indexed) {
 382          SetupIndexArray();
 383      }
 384  
 385      const bool wait_built = !async_shaders || regs.pipeline.num_vertices <= 6;
 386      if (!pipeline_cache.BindPipeline(pipeline_info, wait_built)) {
 387          return true;
 388      }
 389  
 390      const DrawParams params = {
 391          .vertex_count = regs.pipeline.num_vertices,
 392          .vertex_offset = -static_cast<s32>(vertex_info.vs_input_index_min),
 393          .binding_count = pipeline_info.vertex_layout.binding_count,
 394          .bindings = binding_offsets,
 395          .is_indexed = is_indexed,
 396      };
 397  
 398      scheduler.Record([this, params](vk::CommandBuffer cmdbuf) {
 399          std::array<vk::DeviceSize, 16> offsets;
 400          std::transform(params.bindings.begin(), params.bindings.end(), offsets.begin(),
 401                         [](u32 offset) { return static_cast<vk::DeviceSize>(offset); });
 402          cmdbuf.bindVertexBuffers(0, params.binding_count, vertex_buffers.data(), offsets.data());
 403          if (params.is_indexed) {
 404              cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0);
 405          } else {
 406              cmdbuf.draw(params.vertex_count, 1, 0, 0);
 407          }
 408      });
 409  
 410      return true;
 411  }
 412  
 413  void RasterizerVulkan::SetupIndexArray() {
 414      const bool index_u8 = regs.pipeline.index_array.format == 0;
 415      const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported();
 416      const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2);
 417      const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16;
 418  
 419      const u8* index_data =
 420          memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
 421                                    regs.pipeline.index_array.offset);
 422  
 423      auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
 424  
 425      if (index_u8 && !native_u8) {
 426          u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
 427          for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
 428              index_ptr_u16[i] = index_data[i];
 429          }
 430      } else {
 431          std::memcpy(index_ptr, index_data, index_buffer_size);
 432      }
 433  
 434      stream_buffer.Commit(index_buffer_size);
 435  
 436      scheduler.Record(
 437          [this, index_offset = index_offset, index_type = index_type](vk::CommandBuffer cmdbuf) {
 438              cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type);
 439          });
 440  }
 441  
 442  void RasterizerVulkan::DrawTriangles() {
 443      if (vertex_batch.empty()) {
 444          return;
 445      }
 446  
 447      pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
 448      pipeline_info.vertex_layout = software_layout;
 449  
 450      pipeline_cache.UseTrivialVertexShader();
 451      pipeline_cache.UseTrivialGeometryShader();
 452  
 453      Draw(false, false);
 454  }
 455  
 456  bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
 457      MICROPROFILE_SCOPE(Vulkan_Drawing);
 458  
 459      const bool shadow_rendering = regs.framebuffer.IsShadowRendering();
 460      const bool has_stencil = regs.framebuffer.HasStencil();
 461  
 462      const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask;
 463      const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled();
 464      const bool using_color_fb =
 465          regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb;
 466      const bool using_depth_fb =
 467          !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
 468          (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 ||
 469           (has_stencil && pipeline_info.depth_stencil.stencil_test_enable));
 470  
 471      const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
 472      const Framebuffer* framebuffer = fb_helper.Framebuffer();
 473      if (!framebuffer->Handle()) {
 474          return true;
 475      }
 476  
 477      pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color);
 478      pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth);
 479  
 480      if (shadow_rendering) {
 481          pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color));
 482      } else {
 483          Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
 484          pipeline_cache.BindStorageImage(6, null_surface.StorageView());
 485      }
 486  
 487      // Update scissor uniforms
 488      const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor();
 489      if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 ||
 490          fs_uniform_block_data.data.scissor_x2 != scissor_x2 ||
 491          fs_uniform_block_data.data.scissor_y1 != scissor_y1 ||
 492          fs_uniform_block_data.data.scissor_y2 != scissor_y2) {
 493  
 494          fs_uniform_block_data.data.scissor_x1 = scissor_x1;
 495          fs_uniform_block_data.data.scissor_x2 = scissor_x2;
 496          fs_uniform_block_data.data.scissor_y1 = scissor_y1;
 497          fs_uniform_block_data.data.scissor_y2 = scissor_y2;
 498          fs_uniform_block_data.dirty = true;
 499      }
 500  
 501      // Sync and bind the texture surfaces
 502      SyncTextureUnits(framebuffer);
 503  
 504      // Sync and bind the shader
 505      if (shader_dirty) {
 506          pipeline_cache.UseFragmentShader(regs, user_config);
 507          shader_dirty = false;
 508      }
 509  
 510      // Sync the LUTs within the texture buffer
 511      SyncAndUploadLUTs();
 512      SyncAndUploadLUTsLF();
 513      UploadUniforms(accelerate);
 514  
 515      // Begin rendering
 516      const auto draw_rect = fb_helper.DrawRect();
 517      renderpass_cache.BeginRendering(framebuffer, draw_rect);
 518  
 519      // Configure viewport and scissor
 520      const auto viewport = fb_helper.Viewport();
 521      pipeline_info.dynamic.viewport = Common::Rectangle<s32>{
 522          viewport.x,
 523          viewport.y,
 524          viewport.x + viewport.width,
 525          viewport.y + viewport.height,
 526      };
 527      pipeline_info.dynamic.scissor = draw_rect;
 528  
 529      // Draw the vertex batch
 530      bool succeeded = true;
 531      if (accelerate) {
 532          succeeded = AccelerateDrawBatchInternal(is_indexed);
 533      } else {
 534          pipeline_cache.BindPipeline(pipeline_info, true);
 535  
 536          const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex);
 537          const u32 vertex_count = static_cast<u32>(vertex_batch.size());
 538          const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex));
 539  
 540          std::memcpy(buffer, vertex_batch.data(), vertex_size);
 541          stream_buffer.Commit(vertex_size);
 542  
 543          scheduler.Record([this, offset = offset, vertex_count](vk::CommandBuffer cmdbuf) {
 544              cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset);
 545              cmdbuf.draw(vertex_count, 1, 0, 0);
 546          });
 547      }
 548  
 549      vertex_batch.clear();
 550      return succeeded;
 551  }
 552  
 553  void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) {
 554      using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
 555  
 556      const auto pica_textures = regs.texturing.GetTextures();
 557      for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
 558          const auto& texture = pica_textures[texture_index];
 559  
 560          // If the texture unit is disabled bind a null surface to it
 561          if (!texture.enabled) {
 562              const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
 563              const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
 564              pipeline_cache.BindTexture(texture_index, null_surface.ImageView(),
 565                                         null_sampler.Handle());
 566              continue;
 567          }
 568  
 569          // Handle special tex0 configurations
 570          if (texture_index == 0) {
 571              switch (texture.config.type.Value()) {
 572              case TextureType::Shadow2D: {
 573                  Surface& surface = res_cache.GetTextureSurface(texture);
 574                  surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
 575                  pipeline_cache.BindStorageImage(0, surface.StorageView());
 576                  continue;
 577              }
 578              case TextureType::ShadowCube: {
 579                  BindShadowCube(texture);
 580                  continue;
 581              }
 582              case TextureType::TextureCube: {
 583                  BindTextureCube(texture);
 584                  continue;
 585              }
 586              default:
 587                  UnbindSpecial();
 588                  break;
 589              }
 590          }
 591  
 592          // Bind the texture provided by the rasterizer cache
 593          Surface& surface = res_cache.GetTextureSurface(texture);
 594          Sampler& sampler = res_cache.GetSampler(texture.config);
 595          if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) {
 596              pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle());
 597          }
 598      }
 599  }
 600  
 601  void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
 602      using CubeFace = Pica::TexturingRegs::CubeFace;
 603      auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format);
 604      constexpr std::array faces = {
 605          CubeFace::PositiveX, CubeFace::NegativeX, CubeFace::PositiveY,
 606          CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ,
 607      };
 608  
 609      for (CubeFace face : faces) {
 610          const u32 binding = static_cast<u32>(face);
 611          info.physical_address = regs.texturing.GetCubePhysicalAddress(face);
 612  
 613          const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info);
 614          Surface& surface = res_cache.GetSurface(surface_id);
 615          surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
 616          pipeline_cache.BindStorageImage(binding, surface.StorageView());
 617      }
 618  }
 619  
 620  void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
 621      using CubeFace = Pica::TexturingRegs::CubeFace;
 622      const VideoCore::TextureCubeConfig config = {
 623          .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX),
 624          .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX),
 625          .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY),
 626          .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY),
 627          .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ),
 628          .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ),
 629          .width = texture.config.width,
 630          .levels = texture.config.lod.max_level + 1,
 631          .format = texture.format,
 632      };
 633  
 634      Surface& surface = res_cache.GetTextureCube(config);
 635      Sampler& sampler = res_cache.GetSampler(texture.config);
 636      pipeline_cache.BindTexture(0, surface.ImageView(), sampler.Handle());
 637  }
 638  
 639  bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer,
 640                                        Surface& surface, Sampler& sampler) {
 641      const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color);
 642      const bool is_feedback_loop = color_view == surface.ImageView();
 643      if (!is_feedback_loop) {
 644          return false;
 645      }
 646  
 647      // Make a temporary copy of the framebuffer to sample from
 648      pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle());
 649      return true;
 650  }
 651  
 652  void RasterizerVulkan::UnbindSpecial() {
 653      Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
 654      for (u32 i = 0; i < 6; i++) {
 655          pipeline_cache.BindStorageImage(i, null_surface.StorageView());
 656      }
 657  }
 658  
 659  void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) {
 660      switch (id) {
 661      // Culling
 662      case PICA_REG_INDEX(rasterizer.cull_mode):
 663          SyncCullMode();
 664          break;
 665  
 666      // Blending
 667      case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
 668          SyncBlendEnabled();
 669          // Update since logic op emulation depends on alpha blend enable.
 670          SyncLogicOp();
 671          SyncColorWriteMask();
 672          break;
 673      case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
 674          SyncBlendFuncs();
 675          break;
 676      case PICA_REG_INDEX(framebuffer.output_merger.blend_const):
 677          SyncBlendColor();
 678          break;
 679  
 680      // Sync VK stencil test + stencil write mask
 681      // (Pica stencil test function register also contains a stencil write mask)
 682      case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func):
 683          SyncStencilTest();
 684          SyncStencilWriteMask();
 685          break;
 686      case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op):
 687      case PICA_REG_INDEX(framebuffer.framebuffer.depth_format):
 688          SyncStencilTest();
 689          break;
 690  
 691      // Sync VK depth test + depth and color write mask
 692      // (Pica depth test function register also contains a depth and color write mask)
 693      case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable):
 694          SyncDepthTest();
 695          SyncDepthWriteMask();
 696          SyncColorWriteMask();
 697          break;
 698  
 699      // Sync VK depth and stencil write mask
 700      // (This is a dedicated combined depth / stencil write-enable register)
 701      case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write):
 702          SyncDepthWriteMask();
 703          SyncStencilWriteMask();
 704          break;
 705  
 706      // Sync VK color write mask
 707      // (This is a dedicated color write-enable register)
 708      case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write):
 709          SyncColorWriteMask();
 710          break;
 711  
 712      // Logic op
 713      case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
 714          SyncLogicOp();
 715          // Update since color write mask is used to emulate no-op.
 716          SyncColorWriteMask();
 717          break;
 718      }
 719  }
 720  
 721  void RasterizerVulkan::FlushAll() {
 722      res_cache.FlushAll();
 723  }
 724  
 725  void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) {
 726      res_cache.FlushRegion(addr, size);
 727  }
 728  
 729  void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) {
 730      res_cache.InvalidateRegion(addr, size);
 731  }
 732  
 733  void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) {
 734      res_cache.FlushRegion(addr, size);
 735      res_cache.InvalidateRegion(addr, size);
 736  }
 737  
 738  void RasterizerVulkan::ClearAll(bool flush) {
 739      res_cache.ClearAll(flush);
 740  }
 741  
 742  bool RasterizerVulkan::AccelerateDisplayTransfer(const Pica::DisplayTransferConfig& config) {
 743      return res_cache.AccelerateDisplayTransfer(config);
 744  }
 745  
 746  bool RasterizerVulkan::AccelerateTextureCopy(const Pica::DisplayTransferConfig& config) {
 747      return res_cache.AccelerateTextureCopy(config);
 748  }
 749  
 750  bool RasterizerVulkan::AccelerateFill(const Pica::MemoryFillConfig& config) {
 751      return res_cache.AccelerateFill(config);
 752  }
 753  
 754  bool RasterizerVulkan::AccelerateDisplay(const Pica::FramebufferConfig& config,
 755                                           PAddr framebuffer_addr, u32 pixel_stride,
 756                                           ScreenInfo& screen_info) {
 757      if (framebuffer_addr == 0) [[unlikely]] {
 758          return false;
 759      }
 760  
 761      VideoCore::SurfaceParams src_params;
 762      src_params.addr = framebuffer_addr;
 763      src_params.width = std::min(config.width.Value(), pixel_stride);
 764      src_params.height = config.height;
 765      src_params.stride = pixel_stride;
 766      src_params.is_tiled = false;
 767      src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format);
 768      src_params.UpdateParams();
 769  
 770      const auto [src_surface_id, src_rect] =
 771          res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true);
 772  
 773      if (!src_surface_id) {
 774          return false;
 775      }
 776  
 777      const Surface& src_surface = res_cache.GetSurface(src_surface_id);
 778      const u32 scaled_width = src_surface.GetScaledWidth();
 779      const u32 scaled_height = src_surface.GetScaledHeight();
 780  
 781      screen_info.texcoords = Common::Rectangle<f32>(
 782          (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
 783          (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
 784  
 785      screen_info.image_view = src_surface.ImageView();
 786  
 787      return true;
 788  }
 789  
 790  void RasterizerVulkan::MakeSoftwareVertexLayout() {
 791      constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
 792  
 793      software_layout = VertexLayout{
 794          .binding_count = 1,
 795          .attribute_count = 8,
 796      };
 797  
 798      for (u32 i = 0; i < software_layout.binding_count; i++) {
 799          VertexBinding& binding = software_layout.bindings[i];
 800          binding.binding.Assign(i);
 801          binding.fixed.Assign(0);
 802          binding.stride.Assign(sizeof(HardwareVertex));
 803      }
 804  
 805      u32 offset = 0;
 806      for (u32 i = 0; i < 8; i++) {
 807          VertexAttribute& attribute = software_layout.attributes[i];
 808          attribute.binding.Assign(0);
 809          attribute.location.Assign(i);
 810          attribute.offset.Assign(offset);
 811          attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
 812          attribute.size.Assign(sizes[i]);
 813          offset += sizes[i] * sizeof(float);
 814      }
 815  }
 816  
 817  void RasterizerVulkan::SyncCullMode() {
 818      pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode);
 819  }
 820  
 821  void RasterizerVulkan::SyncBlendEnabled() {
 822      pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable;
 823  }
 824  
 825  void RasterizerVulkan::SyncBlendFuncs() {
 826      pipeline_info.blending.color_blend_eq.Assign(
 827          regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb);
 828      pipeline_info.blending.alpha_blend_eq.Assign(
 829          regs.framebuffer.output_merger.alpha_blending.blend_equation_a);
 830      pipeline_info.blending.src_color_blend_factor.Assign(
 831          regs.framebuffer.output_merger.alpha_blending.factor_source_rgb);
 832      pipeline_info.blending.dst_color_blend_factor.Assign(
 833          regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb);
 834      pipeline_info.blending.src_alpha_blend_factor.Assign(
 835          regs.framebuffer.output_merger.alpha_blending.factor_source_a);
 836      pipeline_info.blending.dst_alpha_blend_factor.Assign(
 837          regs.framebuffer.output_merger.alpha_blending.factor_dest_a);
 838  }
 839  
 840  void RasterizerVulkan::SyncBlendColor() {
 841      pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw;
 842  }
 843  
 844  void RasterizerVulkan::SyncLogicOp() {
 845      if (instance.NeedsLogicOpEmulation()) {
 846          // We need this in the fragment shader to emulate logic operations
 847          shader_dirty = true;
 848      }
 849  
 850      pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op;
 851  
 852      const bool is_logic_op_emulated =
 853          instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
 854      const bool is_logic_op_noop =
 855          regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
 856      if (is_logic_op_emulated && is_logic_op_noop) {
 857          // Color output is disabled by logic operation. We use color write mask to skip
 858          // color but allow depth write.
 859          pipeline_info.blending.color_write_mask = 0;
 860      }
 861  }
 862  
 863  void RasterizerVulkan::SyncColorWriteMask() {
 864      const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0
 865                                 ? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF
 866                                 : 0;
 867  
 868      const bool is_logic_op_emulated =
 869          instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
 870      const bool is_logic_op_noop =
 871          regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
 872      if (is_logic_op_emulated && is_logic_op_noop) {
 873          // Color output is disabled by logic operation. We use color write mask to skip
 874          // color but allow depth write. Return early to avoid overwriting this.
 875          return;
 876      }
 877  
 878      pipeline_info.blending.color_write_mask = color_mask;
 879  }
 880  
 881  void RasterizerVulkan::SyncStencilWriteMask() {
 882      pipeline_info.dynamic.stencil_write_mask =
 883          (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0)
 884              ? static_cast<u32>(regs.framebuffer.output_merger.stencil_test.write_mask)
 885              : 0;
 886  }
 887  
 888  void RasterizerVulkan::SyncDepthWriteMask() {
 889      const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 &&
 890                                 regs.framebuffer.output_merger.depth_write_enable);
 891      pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable);
 892  }
 893  
 894  void RasterizerVulkan::SyncStencilTest() {
 895      const auto& stencil_test = regs.framebuffer.output_merger.stencil_test;
 896      const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format ==
 897                                                          Pica::FramebufferRegs::DepthFormat::D24S8;
 898  
 899      pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable);
 900      pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail);
 901      pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass);
 902      pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail);
 903      pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func);
 904      pipeline_info.dynamic.stencil_reference = stencil_test.reference_value;
 905      pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask;
 906  }
 907  
 908  void RasterizerVulkan::SyncDepthTest() {
 909      const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 ||
 910                                regs.framebuffer.output_merger.depth_write_enable == 1;
 911      const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1
 912                                  ? regs.framebuffer.output_merger.depth_test_func.Value()
 913                                  : Pica::FramebufferRegs::CompareFunc::Always;
 914  
 915      pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled);
 916      pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op);
 917  }
 918  
 919  void RasterizerVulkan::SyncAndUploadLUTsLF() {
 920      constexpr std::size_t max_size =
 921          sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
 922          sizeof(Common::Vec2f) * 128; // fog
 923  
 924      if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) {
 925          return;
 926      }
 927  
 928      std::size_t bytes_used = 0;
 929      auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
 930  
 931      // Sync the lighting luts
 932      if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) {
 933          for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) {
 934              if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) {
 935                  std::array<Common::Vec2f, 256> new_data;
 936                  const auto& source_lut = pica.lighting.luts[index];
 937                  std::transform(source_lut.begin(), source_lut.end(), new_data.begin(),
 938                                 [](const auto& entry) {
 939                                     return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()};
 940                                 });
 941  
 942                  if (new_data != lighting_lut_data[index] || invalidate) {
 943                      lighting_lut_data[index] = new_data;
 944                      std::memcpy(buffer + bytes_used, new_data.data(),
 945                                  new_data.size() * sizeof(Common::Vec2f));
 946                      fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
 947                          static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
 948                      fs_uniform_block_data.dirty = true;
 949                      bytes_used += new_data.size() * sizeof(Common::Vec2f);
 950                  }
 951                  fs_uniform_block_data.lighting_lut_dirty[index] = false;
 952              }
 953          }
 954          fs_uniform_block_data.lighting_lut_dirty_any = false;
 955      }
 956  
 957      // Sync the fog lut
 958      if (fs_uniform_block_data.fog_lut_dirty || invalidate) {
 959          std::array<Common::Vec2f, 128> new_data;
 960  
 961          std::transform(pica.fog.lut.begin(), pica.fog.lut.end(), new_data.begin(),
 962                         [](const auto& entry) {
 963                             return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()};
 964                         });
 965  
 966          if (new_data != fog_lut_data || invalidate) {
 967              fog_lut_data = new_data;
 968              std::memcpy(buffer + bytes_used, new_data.data(),
 969                          new_data.size() * sizeof(Common::Vec2f));
 970              fs_uniform_block_data.data.fog_lut_offset =
 971                  static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
 972              fs_uniform_block_data.dirty = true;
 973              bytes_used += new_data.size() * sizeof(Common::Vec2f);
 974          }
 975          fs_uniform_block_data.fog_lut_dirty = false;
 976      }
 977  
 978      texture_lf_buffer.Commit(static_cast<u32>(bytes_used));
 979  }
 980  
 981  void RasterizerVulkan::SyncAndUploadLUTs() {
 982      const auto& proctex = pica.proctex;
 983      constexpr std::size_t max_size =
 984          sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha
 985          sizeof(Common::Vec4f) * 256 +     // proctex
 986          sizeof(Common::Vec4f) * 256;      // proctex diff
 987  
 988      if (!fs_uniform_block_data.proctex_noise_lut_dirty &&
 989          !fs_uniform_block_data.proctex_color_map_dirty &&
 990          !fs_uniform_block_data.proctex_alpha_map_dirty &&
 991          !fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) {
 992          return;
 993      }
 994  
 995      std::size_t bytes_used = 0;
 996      auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
 997  
 998      // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
 999      auto sync_proctex_value_lut =
1000          [this, buffer = buffer, offset = offset, invalidate = invalidate,
1001           &bytes_used](const std::array<Pica::PicaCore::ProcTex::ValueEntry, 128>& lut,
1002                        std::array<Common::Vec2f, 128>& lut_data, int& lut_offset) {
1003              std::array<Common::Vec2f, 128> new_data;
1004              std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) {
1005                  return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()};
1006              });
1007  
1008              if (new_data != lut_data || invalidate) {
1009                  lut_data = new_data;
1010                  std::memcpy(buffer + bytes_used, new_data.data(),
1011                              new_data.size() * sizeof(Common::Vec2f));
1012                  lut_offset = static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
1013                  fs_uniform_block_data.dirty = true;
1014                  bytes_used += new_data.size() * sizeof(Common::Vec2f);
1015              }
1016          };
1017  
1018      // Sync the proctex noise lut
1019      if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) {
1020          sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data,
1021                                 fs_uniform_block_data.data.proctex_noise_lut_offset);
1022          fs_uniform_block_data.proctex_noise_lut_dirty = false;
1023      }
1024  
1025      // Sync the proctex color map
1026      if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) {
1027          sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data,
1028                                 fs_uniform_block_data.data.proctex_color_map_offset);
1029          fs_uniform_block_data.proctex_color_map_dirty = false;
1030      }
1031  
1032      // Sync the proctex alpha map
1033      if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) {
1034          sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data,
1035                                 fs_uniform_block_data.data.proctex_alpha_map_offset);
1036          fs_uniform_block_data.proctex_alpha_map_dirty = false;
1037      }
1038  
1039      // Sync the proctex lut
1040      if (fs_uniform_block_data.proctex_lut_dirty || invalidate) {
1041          std::array<Common::Vec4f, 256> new_data;
1042  
1043          std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(),
1044                         [](const auto& entry) {
1045                             auto rgba = entry.ToVector() / 255.0f;
1046                             return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
1047                         });
1048  
1049          if (new_data != proctex_lut_data || invalidate) {
1050              proctex_lut_data = new_data;
1051              std::memcpy(buffer + bytes_used, new_data.data(),
1052                          new_data.size() * sizeof(Common::Vec4f));
1053              fs_uniform_block_data.data.proctex_lut_offset =
1054                  static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f));
1055              fs_uniform_block_data.dirty = true;
1056              bytes_used += new_data.size() * sizeof(Common::Vec4f);
1057          }
1058          fs_uniform_block_data.proctex_lut_dirty = false;
1059      }
1060  
1061      // Sync the proctex difference lut
1062      if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) {
1063          std::array<Common::Vec4f, 256> new_data;
1064  
1065          std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(),
1066                         new_data.begin(), [](const auto& entry) {
1067                             auto rgba = entry.ToVector() / 255.0f;
1068                             return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
1069                         });
1070  
1071          if (new_data != proctex_diff_lut_data || invalidate) {
1072              proctex_diff_lut_data = new_data;
1073              std::memcpy(buffer + bytes_used, new_data.data(),
1074                          new_data.size() * sizeof(Common::Vec4f));
1075              fs_uniform_block_data.data.proctex_diff_lut_offset =
1076                  static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f));
1077              fs_uniform_block_data.dirty = true;
1078              bytes_used += new_data.size() * sizeof(Common::Vec4f);
1079          }
1080          fs_uniform_block_data.proctex_diff_lut_dirty = false;
1081      }
1082  
1083      texture_buffer.Commit(static_cast<u32>(bytes_used));
1084  }
1085  
1086  void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
1087      const bool sync_vs_pica = accelerate_draw;
1088      const bool sync_vs = vs_uniform_block_data.dirty;
1089      const bool sync_fs = fs_uniform_block_data.dirty;
1090      if (!sync_vs_pica && !sync_vs && !sync_fs) {
1091          return;
1092      }
1093  
1094      const u64 uniform_size =
1095          uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs;
1096      auto [uniforms, offset, invalidate] =
1097          uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
1098  
1099      u32 used_bytes = 0;
1100  
1101      if (sync_vs || invalidate) {
1102          std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data,
1103                      sizeof(vs_uniform_block_data.data));
1104  
1105          pipeline_cache.SetBufferOffset(1, offset + used_bytes);
1106          vs_uniform_block_data.dirty = false;
1107          used_bytes += static_cast<u32>(uniform_size_aligned_vs);
1108      }
1109  
1110      if (sync_fs || invalidate) {
1111          std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data,
1112                      sizeof(fs_uniform_block_data.data));
1113  
1114          pipeline_cache.SetBufferOffset(2, offset + used_bytes);
1115          fs_uniform_block_data.dirty = false;
1116          used_bytes += static_cast<u32>(uniform_size_aligned_fs);
1117      }
1118  
1119      if (sync_vs_pica) {
1120          VSPicaUniformData vs_uniforms;
1121          vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup);
1122          std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
1123  
1124          pipeline_cache.SetBufferOffset(0, offset + used_bytes);
1125          used_bytes += static_cast<u32>(uniform_size_aligned_vs_pica);
1126      }
1127  
1128      uniform_buffer.Commit(used_bytes);
1129  }
1130  
1131  } // namespace Vulkan