ThreadedRenderer.cs
1 using Ryujinx.Common; 2 using Ryujinx.Common.Configuration; 3 using Ryujinx.Graphics.GAL.Multithreading.Commands; 4 using Ryujinx.Graphics.GAL.Multithreading.Commands.Buffer; 5 using Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer; 6 using Ryujinx.Graphics.GAL.Multithreading.Model; 7 using Ryujinx.Graphics.GAL.Multithreading.Resources; 8 using Ryujinx.Graphics.GAL.Multithreading.Resources.Programs; 9 using System; 10 using System.Diagnostics; 11 using System.Runtime.CompilerServices; 12 using System.Runtime.InteropServices; 13 using System.Threading; 14 15 namespace Ryujinx.Graphics.GAL.Multithreading 16 { 17 /// <summary> 18 /// The ThreadedRenderer is a layer that can be put in front of any Renderer backend to make 19 /// its processing happen on a separate thread, rather than intertwined with the GPU emulation. 20 /// A new thread is created to handle the GPU command processing, separate from the renderer thread. 21 /// Calls to the renderer, pipeline and resources are queued to happen on the renderer thread. 22 /// </summary> 23 public class ThreadedRenderer : IRenderer 24 { 25 private const int SpanPoolBytes = 4 * 1024 * 1024; 26 private const int MaxRefsPerCommand = 2; 27 private const int QueueCount = 10000; 28 29 private readonly int _elementSize; 30 private readonly IRenderer _baseRenderer; 31 private Thread _gpuThread; 32 private Thread _backendThread; 33 private bool _running; 34 35 private readonly AutoResetEvent _frameComplete = new(true); 36 37 private readonly ManualResetEventSlim _galWorkAvailable; 38 private readonly CircularSpanPool _spanPool; 39 40 private readonly ManualResetEventSlim _invokeRun; 41 private readonly AutoResetEvent _interruptRun; 42 43 private bool _lastSampleCounterClear = true; 44 45 private readonly byte[] _commandQueue; 46 private readonly object[] _refQueue; 47 48 private int _consumerPtr; 49 private int _commandCount; 50 51 private int _producerPtr; 52 private int _lastProducedPtr; 53 private int _invokePtr; 54 55 private int _refProducerPtr; 56 private int _refConsumerPtr; 57 58 private Action _interruptAction; 59 private readonly object _interruptLock = new(); 60 61 public event EventHandler<ScreenCaptureImageInfo> ScreenCaptured; 62 63 internal BufferMap Buffers { get; } 64 internal SyncMap Sync { get; } 65 internal CircularSpanPool SpanPool { get; } 66 internal ProgramQueue Programs { get; } 67 68 public IPipeline Pipeline { get; } 69 public IWindow Window { get; } 70 71 public IRenderer BaseRenderer => _baseRenderer; 72 73 public bool PreferThreading => _baseRenderer.PreferThreading; 74 75 public ThreadedRenderer(IRenderer renderer) 76 { 77 _baseRenderer = renderer; 78 79 renderer.ScreenCaptured += (sender, info) => ScreenCaptured?.Invoke(this, info); 80 renderer.SetInterruptAction(Interrupt); 81 82 Pipeline = new ThreadedPipeline(this); 83 Window = new ThreadedWindow(this, renderer); 84 Buffers = new BufferMap(); 85 Sync = new SyncMap(); 86 Programs = new ProgramQueue(renderer); 87 88 _galWorkAvailable = new ManualResetEventSlim(false); 89 _invokeRun = new ManualResetEventSlim(); 90 _interruptRun = new AutoResetEvent(false); 91 _spanPool = new CircularSpanPool(this, SpanPoolBytes); 92 SpanPool = _spanPool; 93 94 _elementSize = BitUtils.AlignUp(CommandHelper.GetMaxCommandSize(), 4); 95 96 _commandQueue = new byte[_elementSize * QueueCount]; 97 _refQueue = new object[MaxRefsPerCommand * QueueCount]; 98 } 99 100 public void RunLoop(ThreadStart gpuLoop) 101 { 102 _running = true; 103 104 _backendThread = Thread.CurrentThread; 105 106 _gpuThread = new Thread(gpuLoop) 107 { 108 Name = "GPU.MainThread", 109 }; 110 111 _gpuThread.Start(); 112 113 RenderLoop(); 114 } 115 116 public void RenderLoop() 117 { 118 // Power through the render queue until the Gpu thread work is done. 119 120 while (_running) 121 { 122 _galWorkAvailable.Wait(); 123 _galWorkAvailable.Reset(); 124 125 if (Volatile.Read(ref _interruptAction) != null) 126 { 127 _interruptAction(); 128 _interruptRun.Set(); 129 130 Interlocked.Exchange(ref _interruptAction, null); 131 } 132 133 // The other thread can only increase the command count. 134 // We can assume that if it is above 0, it will stay there or get higher. 135 136 while (Volatile.Read(ref _commandCount) > 0 && Volatile.Read(ref _interruptAction) == null) 137 { 138 int commandPtr = _consumerPtr; 139 140 Span<byte> command = new(_commandQueue, commandPtr * _elementSize, _elementSize); 141 142 // Run the command. 143 144 CommandHelper.RunCommand(command, this, _baseRenderer); 145 146 if (Interlocked.CompareExchange(ref _invokePtr, -1, commandPtr) == commandPtr) 147 { 148 _invokeRun.Set(); 149 } 150 151 _consumerPtr = (_consumerPtr + 1) % QueueCount; 152 153 Interlocked.Decrement(ref _commandCount); 154 } 155 } 156 } 157 158 internal SpanRef<T> CopySpan<T>(ReadOnlySpan<T> data) where T : unmanaged 159 { 160 return _spanPool.Insert(data); 161 } 162 163 private TableRef<T> Ref<T>(T reference) 164 { 165 return new TableRef<T>(this, reference); 166 } 167 168 internal ref T New<T>() where T : struct 169 { 170 while (_producerPtr == (Volatile.Read(ref _consumerPtr) + QueueCount - 1) % QueueCount) 171 { 172 // If incrementing the producer pointer would overflow, we need to wait. 173 // _consumerPtr can only move forward, so there's no race to worry about here. 174 175 Thread.Sleep(1); 176 } 177 178 int taken = _producerPtr; 179 _lastProducedPtr = taken; 180 181 _producerPtr = (_producerPtr + 1) % QueueCount; 182 183 Span<byte> memory = new(_commandQueue, taken * _elementSize, _elementSize); 184 ref T result = ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(memory)); 185 186 memory[^1] = (byte)((IGALCommand)result).CommandType; 187 188 return ref result; 189 } 190 191 internal int AddTableRef(object obj) 192 { 193 // The reference table is sized so that it will never overflow, so long as the references are taken after the command is allocated. 194 195 int index = _refProducerPtr; 196 197 _refQueue[index] = obj; 198 199 _refProducerPtr = (_refProducerPtr + 1) % _refQueue.Length; 200 201 return index; 202 } 203 204 internal object RemoveTableRef(int index) 205 { 206 Debug.Assert(index == _refConsumerPtr); 207 208 object result = _refQueue[_refConsumerPtr]; 209 _refQueue[_refConsumerPtr] = null; 210 211 _refConsumerPtr = (_refConsumerPtr + 1) % _refQueue.Length; 212 213 return result; 214 } 215 216 internal void QueueCommand() 217 { 218 int result = Interlocked.Increment(ref _commandCount); 219 220 if (result == 1) 221 { 222 _galWorkAvailable.Set(); 223 } 224 } 225 226 internal void InvokeCommand() 227 { 228 _invokeRun.Reset(); 229 _invokePtr = _lastProducedPtr; 230 231 QueueCommand(); 232 233 // Wait for the command to complete. 234 _invokeRun.Wait(); 235 } 236 237 internal void WaitForFrame() 238 { 239 _frameComplete.WaitOne(); 240 } 241 242 internal void SignalFrame() 243 { 244 _frameComplete.Set(); 245 } 246 247 internal bool IsGpuThread() 248 { 249 return Thread.CurrentThread == _gpuThread; 250 } 251 252 public void BackgroundContextAction(Action action, bool alwaysBackground = false) 253 { 254 if (IsGpuThread() && !alwaysBackground) 255 { 256 // The action must be performed on the render thread. 257 New<ActionCommand>().Set(Ref(action)); 258 InvokeCommand(); 259 } 260 else 261 { 262 _baseRenderer.BackgroundContextAction(action, true); 263 } 264 } 265 266 public BufferHandle CreateBuffer(int size, BufferAccess access) 267 { 268 BufferHandle handle = Buffers.CreateBufferHandle(); 269 New<CreateBufferAccessCommand>().Set(handle, size, access); 270 QueueCommand(); 271 272 return handle; 273 } 274 275 public BufferHandle CreateBuffer(nint pointer, int size) 276 { 277 BufferHandle handle = Buffers.CreateBufferHandle(); 278 New<CreateHostBufferCommand>().Set(handle, pointer, size); 279 QueueCommand(); 280 281 return handle; 282 } 283 284 public BufferHandle CreateBufferSparse(ReadOnlySpan<BufferRange> storageBuffers) 285 { 286 BufferHandle handle = Buffers.CreateBufferHandle(); 287 New<CreateBufferSparseCommand>().Set(handle, CopySpan(storageBuffers)); 288 QueueCommand(); 289 290 return handle; 291 } 292 293 public IImageArray CreateImageArray(int size, bool isBuffer) 294 { 295 var imageArray = new ThreadedImageArray(this); 296 New<CreateImageArrayCommand>().Set(Ref(imageArray), size, isBuffer); 297 QueueCommand(); 298 299 return imageArray; 300 } 301 302 public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info) 303 { 304 var program = new ThreadedProgram(this); 305 306 SourceProgramRequest request = new(program, shaders, info); 307 308 Programs.Add(request); 309 310 New<CreateProgramCommand>().Set(Ref((IProgramRequest)request)); 311 QueueCommand(); 312 313 return program; 314 } 315 316 public ISampler CreateSampler(SamplerCreateInfo info) 317 { 318 var sampler = new ThreadedSampler(this); 319 New<CreateSamplerCommand>().Set(Ref(sampler), info); 320 QueueCommand(); 321 322 return sampler; 323 } 324 325 public void CreateSync(ulong id, bool strict) 326 { 327 Sync.CreateSyncHandle(id); 328 New<CreateSyncCommand>().Set(id, strict); 329 QueueCommand(); 330 } 331 332 public ITexture CreateTexture(TextureCreateInfo info) 333 { 334 if (IsGpuThread()) 335 { 336 var texture = new ThreadedTexture(this, info); 337 New<CreateTextureCommand>().Set(Ref(texture), info); 338 QueueCommand(); 339 340 return texture; 341 } 342 else 343 { 344 var texture = new ThreadedTexture(this, info) 345 { 346 Base = _baseRenderer.CreateTexture(info), 347 }; 348 349 return texture; 350 } 351 } 352 public ITextureArray CreateTextureArray(int size, bool isBuffer) 353 { 354 var textureArray = new ThreadedTextureArray(this); 355 New<CreateTextureArrayCommand>().Set(Ref(textureArray), size, isBuffer); 356 QueueCommand(); 357 358 return textureArray; 359 } 360 361 public void DeleteBuffer(BufferHandle buffer) 362 { 363 New<BufferDisposeCommand>().Set(buffer); 364 QueueCommand(); 365 } 366 367 public PinnedSpan<byte> GetBufferData(BufferHandle buffer, int offset, int size) 368 { 369 if (IsGpuThread()) 370 { 371 ResultBox<PinnedSpan<byte>> box = new(); 372 New<BufferGetDataCommand>().Set(buffer, offset, size, Ref(box)); 373 InvokeCommand(); 374 375 return box.Result; 376 } 377 else 378 { 379 return _baseRenderer.GetBufferData(Buffers.MapBufferBlocking(buffer), offset, size); 380 } 381 } 382 383 public Capabilities GetCapabilities() 384 { 385 ResultBox<Capabilities> box = new(); 386 New<GetCapabilitiesCommand>().Set(Ref(box)); 387 InvokeCommand(); 388 389 return box.Result; 390 } 391 392 public ulong GetCurrentSync() 393 { 394 return _baseRenderer.GetCurrentSync(); 395 } 396 397 public HardwareInfo GetHardwareInfo() 398 { 399 return _baseRenderer.GetHardwareInfo(); 400 } 401 402 /// <summary> 403 /// Initialize the base renderer. Must be called on the render thread. 404 /// </summary> 405 /// <param name="logLevel">Log level to use</param> 406 public void Initialize(GraphicsDebugLevel logLevel) 407 { 408 _baseRenderer.Initialize(logLevel); 409 } 410 411 public IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info) 412 { 413 var program = new ThreadedProgram(this); 414 415 BinaryProgramRequest request = new(program, programBinary, hasFragmentShader, info); 416 Programs.Add(request); 417 418 New<CreateProgramCommand>().Set(Ref((IProgramRequest)request)); 419 QueueCommand(); 420 421 return program; 422 } 423 424 public void PreFrame() 425 { 426 New<PreFrameCommand>(); 427 QueueCommand(); 428 } 429 430 public ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler, float divisor, bool hostReserved) 431 { 432 ThreadedCounterEvent evt = new(this, type, _lastSampleCounterClear); 433 New<ReportCounterCommand>().Set(Ref(evt), type, Ref(resultHandler), divisor, hostReserved); 434 QueueCommand(); 435 436 if (type == CounterType.SamplesPassed) 437 { 438 _lastSampleCounterClear = false; 439 } 440 441 return evt; 442 } 443 444 public void ResetCounter(CounterType type) 445 { 446 New<ResetCounterCommand>().Set(type); 447 QueueCommand(); 448 _lastSampleCounterClear = true; 449 } 450 451 public void Screenshot() 452 { 453 _baseRenderer.Screenshot(); 454 } 455 456 public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data) 457 { 458 New<BufferSetDataCommand>().Set(buffer, offset, CopySpan(data)); 459 QueueCommand(); 460 } 461 462 public void UpdateCounters() 463 { 464 New<UpdateCountersCommand>(); 465 QueueCommand(); 466 } 467 468 public void WaitSync(ulong id) 469 { 470 Sync.WaitSyncAvailability(id); 471 472 _baseRenderer.WaitSync(id); 473 } 474 475 private void Interrupt(Action action) 476 { 477 // Interrupt the backend thread from any external thread and invoke the given action. 478 479 if (Thread.CurrentThread == _backendThread) 480 { 481 // If this is called from the backend thread, the action can run immediately. 482 action(); 483 } 484 else 485 { 486 lock (_interruptLock) 487 { 488 while (Interlocked.CompareExchange(ref _interruptAction, action, null) != null) 489 { 490 } 491 492 _galWorkAvailable.Set(); 493 494 _interruptRun.WaitOne(); 495 } 496 } 497 } 498 499 public void SetInterruptAction(Action<Action> interruptAction) 500 { 501 // Threaded renderer ignores given interrupt action, as it provides its own to the child renderer. 502 } 503 504 public bool PrepareHostMapping(nint address, ulong size) 505 { 506 return _baseRenderer.PrepareHostMapping(address, size); 507 } 508 509 public void FlushThreadedCommands() 510 { 511 SpinWait wait = new(); 512 513 while (Volatile.Read(ref _commandCount) > 0) 514 { 515 wait.SpinOnce(); 516 } 517 } 518 519 public void Dispose() 520 { 521 GC.SuppressFinalize(this); 522 523 // Dispose must happen from the render thread, after all commands have completed. 524 525 // Stop the GPU thread. 526 _running = false; 527 _galWorkAvailable.Set(); 528 529 if (_gpuThread != null && _gpuThread.IsAlive) 530 { 531 _gpuThread.Join(); 532 } 533 534 // Dispose the renderer. 535 _baseRenderer.Dispose(); 536 537 // Dispose events. 538 _frameComplete.Dispose(); 539 _galWorkAvailable.Dispose(); 540 _invokeRun.Dispose(); 541 _interruptRun.Dispose(); 542 543 Sync.Dispose(); 544 } 545 } 546 }