/ src / Ryujinx.Graphics.Vulkan / BarrierBatch.cs
BarrierBatch.cs
  1  using Silk.NET.Vulkan;
  2  using System;
  3  using System.Collections.Generic;
  4  using System.Runtime.CompilerServices;
  5  
  6  namespace Ryujinx.Graphics.Vulkan
  7  {
  8      internal class BarrierBatch : IDisposable
  9      {
 10          private const int MaxBarriersPerCall = 16;
 11  
 12          private const AccessFlags BaseAccess = AccessFlags.ShaderReadBit | AccessFlags.ShaderWriteBit;
 13          private const AccessFlags BufferAccess = AccessFlags.IndexReadBit | AccessFlags.VertexAttributeReadBit | AccessFlags.UniformReadBit;
 14          private const AccessFlags CommandBufferAccess = AccessFlags.IndirectCommandReadBit;
 15  
 16          private readonly VulkanRenderer _gd;
 17  
 18          private readonly NativeArray<MemoryBarrier> _memoryBarrierBatch = new(MaxBarriersPerCall);
 19          private readonly NativeArray<BufferMemoryBarrier> _bufferBarrierBatch = new(MaxBarriersPerCall);
 20          private readonly NativeArray<ImageMemoryBarrier> _imageBarrierBatch = new(MaxBarriersPerCall);
 21  
 22          private readonly List<BarrierWithStageFlags<MemoryBarrier, int>> _memoryBarriers = new();
 23          private readonly List<BarrierWithStageFlags<BufferMemoryBarrier, int>> _bufferBarriers = new();
 24          private readonly List<BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage>> _imageBarriers = new();
 25          private int _queuedBarrierCount;
 26  
 27          private enum IncoherentBarrierType
 28          {
 29              None,
 30              Texture,
 31              All,
 32              CommandBuffer
 33          }
 34  
 35          private bool _feedbackLoopActive;
 36          private PipelineStageFlags _incoherentBufferWriteStages;
 37          private PipelineStageFlags _incoherentTextureWriteStages;
 38          private PipelineStageFlags _extraStages;
 39          private IncoherentBarrierType _queuedIncoherentBarrier;
 40          private bool _queuedFeedbackLoopBarrier;
 41  
 42          public BarrierBatch(VulkanRenderer gd)
 43          {
 44              _gd = gd;
 45          }
 46  
 47          public static (AccessFlags Access, PipelineStageFlags Stages) GetSubpassAccessSuperset(VulkanRenderer gd)
 48          {
 49              AccessFlags access = BufferAccess;
 50              PipelineStageFlags stages = PipelineStageFlags.AllGraphicsBit;
 51  
 52              if (gd.TransformFeedbackApi != null)
 53              {
 54                  access |= AccessFlags.TransformFeedbackWriteBitExt;
 55                  stages |= PipelineStageFlags.TransformFeedbackBitExt;
 56              }
 57  
 58              return (access, stages);
 59          }
 60  
 61          private readonly record struct StageFlags : IEquatable<StageFlags>
 62          {
 63              public readonly PipelineStageFlags Source;
 64              public readonly PipelineStageFlags Dest;
 65  
 66              public StageFlags(PipelineStageFlags source, PipelineStageFlags dest)
 67              {
 68                  Source = source;
 69                  Dest = dest;
 70              }
 71          }
 72  
 73          private readonly struct BarrierWithStageFlags<T, T2> where T : unmanaged
 74          {
 75              public readonly StageFlags Flags;
 76              public readonly T Barrier;
 77              public readonly T2 Resource;
 78  
 79              public BarrierWithStageFlags(StageFlags flags, T barrier)
 80              {
 81                  Flags = flags;
 82                  Barrier = barrier;
 83                  Resource = default;
 84              }
 85  
 86              public BarrierWithStageFlags(PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags, T barrier, T2 resource)
 87              {
 88                  Flags = new StageFlags(srcStageFlags, dstStageFlags);
 89                  Barrier = barrier;
 90                  Resource = resource;
 91              }
 92          }
 93  
 94          private void QueueBarrier<T, T2>(List<BarrierWithStageFlags<T, T2>> list, T barrier, T2 resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) where T : unmanaged
 95          {
 96              list.Add(new BarrierWithStageFlags<T, T2>(srcStageFlags, dstStageFlags, barrier, resource));
 97              _queuedBarrierCount++;
 98          }
 99  
100          public void QueueBarrier(MemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags)
101          {
102              QueueBarrier(_memoryBarriers, barrier, default, srcStageFlags, dstStageFlags);
103          }
104  
105          public void QueueBarrier(BufferMemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags)
106          {
107              QueueBarrier(_bufferBarriers, barrier, default, srcStageFlags, dstStageFlags);
108          }
109  
110          public void QueueBarrier(ImageMemoryBarrier barrier, TextureStorage resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags)
111          {
112              QueueBarrier(_imageBarriers, barrier, resource, srcStageFlags, dstStageFlags);
113          }
114  
115          [MethodImpl(MethodImplOptions.AggressiveInlining)]
116          public unsafe void FlushMemoryBarrier(ShaderCollection program, bool inRenderPass)
117          {
118              if (_queuedIncoherentBarrier > IncoherentBarrierType.None)
119              {
120                  // We should emit a memory barrier if there's a write access in the program (current program, or program since last barrier)
121                  bool hasTextureWrite = _incoherentTextureWriteStages != PipelineStageFlags.None;
122                  bool hasBufferWrite = _incoherentBufferWriteStages != PipelineStageFlags.None;
123                  bool hasBufferBarrier = _queuedIncoherentBarrier > IncoherentBarrierType.Texture;
124  
125                  if (hasTextureWrite || (hasBufferBarrier && hasBufferWrite))
126                  {
127                      AccessFlags access = BaseAccess;
128  
129                      PipelineStageFlags stages = inRenderPass ? PipelineStageFlags.AllGraphicsBit : PipelineStageFlags.AllCommandsBit;
130  
131                      if (hasBufferBarrier && hasBufferWrite)
132                      {
133                          access |= BufferAccess;
134  
135                          if (_gd.TransformFeedbackApi != null)
136                          {
137                              access |= AccessFlags.TransformFeedbackWriteBitExt;
138                              stages |= PipelineStageFlags.TransformFeedbackBitExt;
139                          }
140                      }
141  
142                      if (_queuedIncoherentBarrier == IncoherentBarrierType.CommandBuffer)
143                      {
144                          access |= CommandBufferAccess;
145                          stages |= PipelineStageFlags.DrawIndirectBit;
146                      }
147  
148                      MemoryBarrier barrier = new MemoryBarrier()
149                      {
150                          SType = StructureType.MemoryBarrier,
151                          SrcAccessMask = access,
152                          DstAccessMask = access
153                      };
154  
155                      QueueBarrier(barrier, stages, stages);
156  
157                      _incoherentTextureWriteStages = program?.IncoherentTextureWriteStages ?? PipelineStageFlags.None;
158  
159                      if (_queuedIncoherentBarrier > IncoherentBarrierType.Texture)
160                      {
161                          if (program != null)
162                          {
163                              _incoherentBufferWriteStages = program.IncoherentBufferWriteStages | _extraStages;
164                          }
165                          else
166                          {
167                              _incoherentBufferWriteStages = PipelineStageFlags.None;
168                          }
169                      }
170  
171                      _queuedIncoherentBarrier = IncoherentBarrierType.None;
172                      _queuedFeedbackLoopBarrier = false;
173                  }
174                  else if (_feedbackLoopActive && _queuedFeedbackLoopBarrier)
175                  {
176                      // Feedback loop barrier.
177  
178                      MemoryBarrier barrier = new MemoryBarrier()
179                      {
180                          SType = StructureType.MemoryBarrier,
181                          SrcAccessMask = AccessFlags.ShaderWriteBit,
182                          DstAccessMask = AccessFlags.ShaderReadBit
183                      };
184  
185                      QueueBarrier(barrier, PipelineStageFlags.FragmentShaderBit, PipelineStageFlags.AllGraphicsBit);
186  
187                      _queuedFeedbackLoopBarrier = false;
188                  }
189  
190                  _feedbackLoopActive = false;
191              }
192          }
193  
194          public unsafe void Flush(CommandBufferScoped cbs, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
195          {
196              Flush(cbs, null, false, inRenderPass, rpHolder, endRenderPass);
197          }
198  
199          public unsafe void Flush(CommandBufferScoped cbs, ShaderCollection program, bool feedbackLoopActive, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
200          {
201              if (program != null)
202              {
203                  _incoherentBufferWriteStages |= program.IncoherentBufferWriteStages | _extraStages;
204                  _incoherentTextureWriteStages |= program.IncoherentTextureWriteStages;
205              }
206  
207              _feedbackLoopActive |= feedbackLoopActive;
208  
209              FlushMemoryBarrier(program, inRenderPass);
210  
211              if (!inRenderPass && rpHolder != null)
212              {
213                  // Render pass is about to begin. Queue any fences that normally interrupt the pass.
214                  rpHolder.InsertForcedFences(cbs);
215              }
216  
217              while (_queuedBarrierCount > 0)
218              {
219                  int memoryCount = 0;
220                  int bufferCount = 0;
221                  int imageCount = 0;
222  
223                  bool hasBarrier = false;
224                  StageFlags flags = default;
225  
226                  static void AddBarriers<T, T2>(
227                      Span<T> target,
228                      ref int queuedBarrierCount,
229                      ref bool hasBarrier,
230                      ref StageFlags flags,
231                      ref int count,
232                      List<BarrierWithStageFlags<T, T2>> list) where T : unmanaged
233                  {
234                      int firstMatch = -1;
235                      int end = list.Count;
236  
237                      for (int i = 0; i < list.Count; i++)
238                      {
239                          BarrierWithStageFlags<T, T2> barrier = list[i];
240  
241                          if (!hasBarrier)
242                          {
243                              flags = barrier.Flags;
244                              hasBarrier = true;
245  
246                              target[count++] = barrier.Barrier;
247                              queuedBarrierCount--;
248                              firstMatch = i;
249  
250                              if (count >= target.Length)
251                              {
252                                  end = i + 1;
253                                  break;
254                              }
255                          }
256                          else
257                          {
258                              if (flags.Equals(barrier.Flags))
259                              {
260                                  target[count++] = barrier.Barrier;
261                                  queuedBarrierCount--;
262  
263                                  if (firstMatch == -1)
264                                  {
265                                      firstMatch = i;
266                                  }
267  
268                                  if (count >= target.Length)
269                                  {
270                                      end = i + 1;
271                                      break;
272                                  }
273                              }
274                              else
275                              {
276                                  // Delete consumed barriers from the first match to the current non-match.
277                                  if (firstMatch != -1)
278                                  {
279                                      int deleteCount = i - firstMatch;
280                                      list.RemoveRange(firstMatch, deleteCount);
281                                      i -= deleteCount;
282  
283                                      firstMatch = -1;
284                                      end = list.Count;
285                                  }
286                              }
287                          }
288                      }
289  
290                      if (firstMatch == 0 && end == list.Count)
291                      {
292                          list.Clear();
293                      }
294                      else if (firstMatch != -1)
295                      {
296                          int deleteCount = end - firstMatch;
297  
298                          list.RemoveRange(firstMatch, deleteCount);
299                      }
300                  }
301  
302                  if (inRenderPass && _imageBarriers.Count > 0)
303                  {
304                      // Image barriers queued in the batch are meant to be globally scoped,
305                      // but inside a render pass they're scoped to just the range of the render pass.
306  
307                      // On MoltenVK, we just break the rules and always use image barrier.
308                      // On desktop GPUs, all barriers are globally scoped, so we just replace it with a generic memory barrier.
309                      // Generally, we want to avoid this from happening in the future, so flag the texture to immediately
310                      // emit a barrier whenever the current render pass is bound again.
311  
312                      bool anyIsNonAttachment = false;
313  
314                      foreach (BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage> barrier in _imageBarriers)
315                      {
316                          // If the binding is an attachment, don't add it as a forced fence.
317                          bool isAttachment = rpHolder.ContainsAttachment(barrier.Resource);
318  
319                          if (!isAttachment)
320                          {
321                              rpHolder.AddForcedFence(barrier.Resource, barrier.Flags.Dest);
322                              anyIsNonAttachment = true;
323                          }
324                      }
325  
326                      if (_gd.IsTBDR)
327                      {
328                          if (!_gd.IsMoltenVk)
329                          {
330                              if (!anyIsNonAttachment)
331                              {
332                                  // This case is a feedback loop. To prevent this from causing an absolute performance disaster,
333                                  // remove the barriers entirely.
334                                  // If this is not here, there will be a lot of single draw render passes.
335                                  // TODO: explicit handling for feedback loops, likely outside this class.
336  
337                                  _queuedBarrierCount -= _imageBarriers.Count;
338                                  _imageBarriers.Clear();
339                              }
340                              else
341                              {
342                                  // TBDR GPUs are sensitive to barriers, so we need to end the pass to ensure the data is available.
343                                  // Metal already has hazard tracking so MVK doesn't need this.
344                                  endRenderPass();
345                                  inRenderPass = false;
346                              }
347                          }
348                      }
349                      else
350                      {
351                          // Generic pipeline memory barriers will work for desktop GPUs.
352                          // They do require a few more access flags on the subpass dependency, though.
353                          foreach (var barrier in _imageBarriers)
354                          {
355                              _memoryBarriers.Add(new BarrierWithStageFlags<MemoryBarrier, int>(
356                                  barrier.Flags,
357                                  new MemoryBarrier()
358                                  {
359                                      SType = StructureType.MemoryBarrier,
360                                      SrcAccessMask = barrier.Barrier.SrcAccessMask,
361                                      DstAccessMask = barrier.Barrier.DstAccessMask
362                                  }));
363                          }
364  
365                          _imageBarriers.Clear();
366                      }
367                  }
368  
369                  if (inRenderPass && _memoryBarriers.Count > 0)
370                  {
371                      PipelineStageFlags allFlags = PipelineStageFlags.None;
372  
373                      foreach (var barrier in _memoryBarriers)
374                      {
375                          allFlags |= barrier.Flags.Dest;
376                      }
377  
378                      if (allFlags.HasFlag(PipelineStageFlags.DrawIndirectBit) || !_gd.SupportsRenderPassBarrier(allFlags))
379                      {
380                          endRenderPass();
381                          inRenderPass = false;
382                      }
383                  }
384  
385                  AddBarriers(_memoryBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref memoryCount, _memoryBarriers);
386                  AddBarriers(_bufferBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref bufferCount, _bufferBarriers);
387                  AddBarriers(_imageBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref imageCount, _imageBarriers);
388  
389                  if (hasBarrier)
390                  {
391                      PipelineStageFlags srcStageFlags = flags.Source;
392  
393                      if (inRenderPass)
394                      {
395                          // Inside a render pass, barrier stages can only be from rasterization.
396                          srcStageFlags &= ~PipelineStageFlags.ComputeShaderBit;
397                      }
398  
399                      _gd.Api.CmdPipelineBarrier(
400                          cbs.CommandBuffer,
401                          srcStageFlags,
402                          flags.Dest,
403                          0,
404                          (uint)memoryCount,
405                          _memoryBarrierBatch.Pointer,
406                          (uint)bufferCount,
407                          _bufferBarrierBatch.Pointer,
408                          (uint)imageCount,
409                          _imageBarrierBatch.Pointer);
410                  }
411              }
412          }
413  
414          private void QueueIncoherentBarrier(IncoherentBarrierType type)
415          {
416              if (type > _queuedIncoherentBarrier)
417              {
418                  _queuedIncoherentBarrier = type;
419              }
420  
421              _queuedFeedbackLoopBarrier = true;
422          }
423  
424          public void QueueTextureBarrier()
425          {
426              QueueIncoherentBarrier(IncoherentBarrierType.Texture);
427          }
428  
429          public void QueueMemoryBarrier()
430          {
431              QueueIncoherentBarrier(IncoherentBarrierType.All);
432          }
433  
434          public void QueueCommandBufferBarrier()
435          {
436              QueueIncoherentBarrier(IncoherentBarrierType.CommandBuffer);
437          }
438  
439          public void EnableTfbBarriers(bool enable)
440          {
441              if (enable)
442              {
443                  _extraStages |= PipelineStageFlags.TransformFeedbackBitExt;
444              }
445              else
446              {
447                  _extraStages &= ~PipelineStageFlags.TransformFeedbackBitExt;
448              }
449          }
450  
451          public void Dispose()
452          {
453              _memoryBarrierBatch.Dispose();
454              _bufferBarrierBatch.Dispose();
455              _imageBarrierBatch.Dispose();
456          }
457      }
458  }