BarrierBatch.cs
1 using Silk.NET.Vulkan; 2 using System; 3 using System.Collections.Generic; 4 using System.Runtime.CompilerServices; 5 6 namespace Ryujinx.Graphics.Vulkan 7 { 8 internal class BarrierBatch : IDisposable 9 { 10 private const int MaxBarriersPerCall = 16; 11 12 private const AccessFlags BaseAccess = AccessFlags.ShaderReadBit | AccessFlags.ShaderWriteBit; 13 private const AccessFlags BufferAccess = AccessFlags.IndexReadBit | AccessFlags.VertexAttributeReadBit | AccessFlags.UniformReadBit; 14 private const AccessFlags CommandBufferAccess = AccessFlags.IndirectCommandReadBit; 15 16 private readonly VulkanRenderer _gd; 17 18 private readonly NativeArray<MemoryBarrier> _memoryBarrierBatch = new(MaxBarriersPerCall); 19 private readonly NativeArray<BufferMemoryBarrier> _bufferBarrierBatch = new(MaxBarriersPerCall); 20 private readonly NativeArray<ImageMemoryBarrier> _imageBarrierBatch = new(MaxBarriersPerCall); 21 22 private readonly List<BarrierWithStageFlags<MemoryBarrier, int>> _memoryBarriers = new(); 23 private readonly List<BarrierWithStageFlags<BufferMemoryBarrier, int>> _bufferBarriers = new(); 24 private readonly List<BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage>> _imageBarriers = new(); 25 private int _queuedBarrierCount; 26 27 private enum IncoherentBarrierType 28 { 29 None, 30 Texture, 31 All, 32 CommandBuffer 33 } 34 35 private bool _feedbackLoopActive; 36 private PipelineStageFlags _incoherentBufferWriteStages; 37 private PipelineStageFlags _incoherentTextureWriteStages; 38 private PipelineStageFlags _extraStages; 39 private IncoherentBarrierType _queuedIncoherentBarrier; 40 private bool _queuedFeedbackLoopBarrier; 41 42 public BarrierBatch(VulkanRenderer gd) 43 { 44 _gd = gd; 45 } 46 47 public static (AccessFlags Access, PipelineStageFlags Stages) GetSubpassAccessSuperset(VulkanRenderer gd) 48 { 49 AccessFlags access = BufferAccess; 50 PipelineStageFlags stages = PipelineStageFlags.AllGraphicsBit; 51 52 if (gd.TransformFeedbackApi != null) 53 { 54 access |= AccessFlags.TransformFeedbackWriteBitExt; 55 stages |= PipelineStageFlags.TransformFeedbackBitExt; 56 } 57 58 return (access, stages); 59 } 60 61 private readonly record struct StageFlags : IEquatable<StageFlags> 62 { 63 public readonly PipelineStageFlags Source; 64 public readonly PipelineStageFlags Dest; 65 66 public StageFlags(PipelineStageFlags source, PipelineStageFlags dest) 67 { 68 Source = source; 69 Dest = dest; 70 } 71 } 72 73 private readonly struct BarrierWithStageFlags<T, T2> where T : unmanaged 74 { 75 public readonly StageFlags Flags; 76 public readonly T Barrier; 77 public readonly T2 Resource; 78 79 public BarrierWithStageFlags(StageFlags flags, T barrier) 80 { 81 Flags = flags; 82 Barrier = barrier; 83 Resource = default; 84 } 85 86 public BarrierWithStageFlags(PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags, T barrier, T2 resource) 87 { 88 Flags = new StageFlags(srcStageFlags, dstStageFlags); 89 Barrier = barrier; 90 Resource = resource; 91 } 92 } 93 94 private void QueueBarrier<T, T2>(List<BarrierWithStageFlags<T, T2>> list, T barrier, T2 resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) where T : unmanaged 95 { 96 list.Add(new BarrierWithStageFlags<T, T2>(srcStageFlags, dstStageFlags, barrier, resource)); 97 _queuedBarrierCount++; 98 } 99 100 public void QueueBarrier(MemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) 101 { 102 QueueBarrier(_memoryBarriers, barrier, default, srcStageFlags, dstStageFlags); 103 } 104 105 public void QueueBarrier(BufferMemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) 106 { 107 QueueBarrier(_bufferBarriers, barrier, default, srcStageFlags, dstStageFlags); 108 } 109 110 public void QueueBarrier(ImageMemoryBarrier barrier, TextureStorage resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) 111 { 112 QueueBarrier(_imageBarriers, barrier, resource, srcStageFlags, dstStageFlags); 113 } 114 115 [MethodImpl(MethodImplOptions.AggressiveInlining)] 116 public unsafe void FlushMemoryBarrier(ShaderCollection program, bool inRenderPass) 117 { 118 if (_queuedIncoherentBarrier > IncoherentBarrierType.None) 119 { 120 // We should emit a memory barrier if there's a write access in the program (current program, or program since last barrier) 121 bool hasTextureWrite = _incoherentTextureWriteStages != PipelineStageFlags.None; 122 bool hasBufferWrite = _incoherentBufferWriteStages != PipelineStageFlags.None; 123 bool hasBufferBarrier = _queuedIncoherentBarrier > IncoherentBarrierType.Texture; 124 125 if (hasTextureWrite || (hasBufferBarrier && hasBufferWrite)) 126 { 127 AccessFlags access = BaseAccess; 128 129 PipelineStageFlags stages = inRenderPass ? PipelineStageFlags.AllGraphicsBit : PipelineStageFlags.AllCommandsBit; 130 131 if (hasBufferBarrier && hasBufferWrite) 132 { 133 access |= BufferAccess; 134 135 if (_gd.TransformFeedbackApi != null) 136 { 137 access |= AccessFlags.TransformFeedbackWriteBitExt; 138 stages |= PipelineStageFlags.TransformFeedbackBitExt; 139 } 140 } 141 142 if (_queuedIncoherentBarrier == IncoherentBarrierType.CommandBuffer) 143 { 144 access |= CommandBufferAccess; 145 stages |= PipelineStageFlags.DrawIndirectBit; 146 } 147 148 MemoryBarrier barrier = new MemoryBarrier() 149 { 150 SType = StructureType.MemoryBarrier, 151 SrcAccessMask = access, 152 DstAccessMask = access 153 }; 154 155 QueueBarrier(barrier, stages, stages); 156 157 _incoherentTextureWriteStages = program?.IncoherentTextureWriteStages ?? PipelineStageFlags.None; 158 159 if (_queuedIncoherentBarrier > IncoherentBarrierType.Texture) 160 { 161 if (program != null) 162 { 163 _incoherentBufferWriteStages = program.IncoherentBufferWriteStages | _extraStages; 164 } 165 else 166 { 167 _incoherentBufferWriteStages = PipelineStageFlags.None; 168 } 169 } 170 171 _queuedIncoherentBarrier = IncoherentBarrierType.None; 172 _queuedFeedbackLoopBarrier = false; 173 } 174 else if (_feedbackLoopActive && _queuedFeedbackLoopBarrier) 175 { 176 // Feedback loop barrier. 177 178 MemoryBarrier barrier = new MemoryBarrier() 179 { 180 SType = StructureType.MemoryBarrier, 181 SrcAccessMask = AccessFlags.ShaderWriteBit, 182 DstAccessMask = AccessFlags.ShaderReadBit 183 }; 184 185 QueueBarrier(barrier, PipelineStageFlags.FragmentShaderBit, PipelineStageFlags.AllGraphicsBit); 186 187 _queuedFeedbackLoopBarrier = false; 188 } 189 190 _feedbackLoopActive = false; 191 } 192 } 193 194 public unsafe void Flush(CommandBufferScoped cbs, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass) 195 { 196 Flush(cbs, null, false, inRenderPass, rpHolder, endRenderPass); 197 } 198 199 public unsafe void Flush(CommandBufferScoped cbs, ShaderCollection program, bool feedbackLoopActive, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass) 200 { 201 if (program != null) 202 { 203 _incoherentBufferWriteStages |= program.IncoherentBufferWriteStages | _extraStages; 204 _incoherentTextureWriteStages |= program.IncoherentTextureWriteStages; 205 } 206 207 _feedbackLoopActive |= feedbackLoopActive; 208 209 FlushMemoryBarrier(program, inRenderPass); 210 211 if (!inRenderPass && rpHolder != null) 212 { 213 // Render pass is about to begin. Queue any fences that normally interrupt the pass. 214 rpHolder.InsertForcedFences(cbs); 215 } 216 217 while (_queuedBarrierCount > 0) 218 { 219 int memoryCount = 0; 220 int bufferCount = 0; 221 int imageCount = 0; 222 223 bool hasBarrier = false; 224 StageFlags flags = default; 225 226 static void AddBarriers<T, T2>( 227 Span<T> target, 228 ref int queuedBarrierCount, 229 ref bool hasBarrier, 230 ref StageFlags flags, 231 ref int count, 232 List<BarrierWithStageFlags<T, T2>> list) where T : unmanaged 233 { 234 int firstMatch = -1; 235 int end = list.Count; 236 237 for (int i = 0; i < list.Count; i++) 238 { 239 BarrierWithStageFlags<T, T2> barrier = list[i]; 240 241 if (!hasBarrier) 242 { 243 flags = barrier.Flags; 244 hasBarrier = true; 245 246 target[count++] = barrier.Barrier; 247 queuedBarrierCount--; 248 firstMatch = i; 249 250 if (count >= target.Length) 251 { 252 end = i + 1; 253 break; 254 } 255 } 256 else 257 { 258 if (flags.Equals(barrier.Flags)) 259 { 260 target[count++] = barrier.Barrier; 261 queuedBarrierCount--; 262 263 if (firstMatch == -1) 264 { 265 firstMatch = i; 266 } 267 268 if (count >= target.Length) 269 { 270 end = i + 1; 271 break; 272 } 273 } 274 else 275 { 276 // Delete consumed barriers from the first match to the current non-match. 277 if (firstMatch != -1) 278 { 279 int deleteCount = i - firstMatch; 280 list.RemoveRange(firstMatch, deleteCount); 281 i -= deleteCount; 282 283 firstMatch = -1; 284 end = list.Count; 285 } 286 } 287 } 288 } 289 290 if (firstMatch == 0 && end == list.Count) 291 { 292 list.Clear(); 293 } 294 else if (firstMatch != -1) 295 { 296 int deleteCount = end - firstMatch; 297 298 list.RemoveRange(firstMatch, deleteCount); 299 } 300 } 301 302 if (inRenderPass && _imageBarriers.Count > 0) 303 { 304 // Image barriers queued in the batch are meant to be globally scoped, 305 // but inside a render pass they're scoped to just the range of the render pass. 306 307 // On MoltenVK, we just break the rules and always use image barrier. 308 // On desktop GPUs, all barriers are globally scoped, so we just replace it with a generic memory barrier. 309 // Generally, we want to avoid this from happening in the future, so flag the texture to immediately 310 // emit a barrier whenever the current render pass is bound again. 311 312 bool anyIsNonAttachment = false; 313 314 foreach (BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage> barrier in _imageBarriers) 315 { 316 // If the binding is an attachment, don't add it as a forced fence. 317 bool isAttachment = rpHolder.ContainsAttachment(barrier.Resource); 318 319 if (!isAttachment) 320 { 321 rpHolder.AddForcedFence(barrier.Resource, barrier.Flags.Dest); 322 anyIsNonAttachment = true; 323 } 324 } 325 326 if (_gd.IsTBDR) 327 { 328 if (!_gd.IsMoltenVk) 329 { 330 if (!anyIsNonAttachment) 331 { 332 // This case is a feedback loop. To prevent this from causing an absolute performance disaster, 333 // remove the barriers entirely. 334 // If this is not here, there will be a lot of single draw render passes. 335 // TODO: explicit handling for feedback loops, likely outside this class. 336 337 _queuedBarrierCount -= _imageBarriers.Count; 338 _imageBarriers.Clear(); 339 } 340 else 341 { 342 // TBDR GPUs are sensitive to barriers, so we need to end the pass to ensure the data is available. 343 // Metal already has hazard tracking so MVK doesn't need this. 344 endRenderPass(); 345 inRenderPass = false; 346 } 347 } 348 } 349 else 350 { 351 // Generic pipeline memory barriers will work for desktop GPUs. 352 // They do require a few more access flags on the subpass dependency, though. 353 foreach (var barrier in _imageBarriers) 354 { 355 _memoryBarriers.Add(new BarrierWithStageFlags<MemoryBarrier, int>( 356 barrier.Flags, 357 new MemoryBarrier() 358 { 359 SType = StructureType.MemoryBarrier, 360 SrcAccessMask = barrier.Barrier.SrcAccessMask, 361 DstAccessMask = barrier.Barrier.DstAccessMask 362 })); 363 } 364 365 _imageBarriers.Clear(); 366 } 367 } 368 369 if (inRenderPass && _memoryBarriers.Count > 0) 370 { 371 PipelineStageFlags allFlags = PipelineStageFlags.None; 372 373 foreach (var barrier in _memoryBarriers) 374 { 375 allFlags |= barrier.Flags.Dest; 376 } 377 378 if (allFlags.HasFlag(PipelineStageFlags.DrawIndirectBit) || !_gd.SupportsRenderPassBarrier(allFlags)) 379 { 380 endRenderPass(); 381 inRenderPass = false; 382 } 383 } 384 385 AddBarriers(_memoryBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref memoryCount, _memoryBarriers); 386 AddBarriers(_bufferBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref bufferCount, _bufferBarriers); 387 AddBarriers(_imageBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref imageCount, _imageBarriers); 388 389 if (hasBarrier) 390 { 391 PipelineStageFlags srcStageFlags = flags.Source; 392 393 if (inRenderPass) 394 { 395 // Inside a render pass, barrier stages can only be from rasterization. 396 srcStageFlags &= ~PipelineStageFlags.ComputeShaderBit; 397 } 398 399 _gd.Api.CmdPipelineBarrier( 400 cbs.CommandBuffer, 401 srcStageFlags, 402 flags.Dest, 403 0, 404 (uint)memoryCount, 405 _memoryBarrierBatch.Pointer, 406 (uint)bufferCount, 407 _bufferBarrierBatch.Pointer, 408 (uint)imageCount, 409 _imageBarrierBatch.Pointer); 410 } 411 } 412 } 413 414 private void QueueIncoherentBarrier(IncoherentBarrierType type) 415 { 416 if (type > _queuedIncoherentBarrier) 417 { 418 _queuedIncoherentBarrier = type; 419 } 420 421 _queuedFeedbackLoopBarrier = true; 422 } 423 424 public void QueueTextureBarrier() 425 { 426 QueueIncoherentBarrier(IncoherentBarrierType.Texture); 427 } 428 429 public void QueueMemoryBarrier() 430 { 431 QueueIncoherentBarrier(IncoherentBarrierType.All); 432 } 433 434 public void QueueCommandBufferBarrier() 435 { 436 QueueIncoherentBarrier(IncoherentBarrierType.CommandBuffer); 437 } 438 439 public void EnableTfbBarriers(bool enable) 440 { 441 if (enable) 442 { 443 _extraStages |= PipelineStageFlags.TransformFeedbackBitExt; 444 } 445 else 446 { 447 _extraStages &= ~PipelineStageFlags.TransformFeedbackBitExt; 448 } 449 } 450 451 public void Dispose() 452 { 453 _memoryBarrierBatch.Dispose(); 454 _bufferBarrierBatch.Dispose(); 455 _imageBarrierBatch.Dispose(); 456 } 457 } 458 }