/ src / Ryujinx.Graphics.GAL / Multithreading / ThreadedRenderer.cs
ThreadedRenderer.cs
  1  using Ryujinx.Common;
  2  using Ryujinx.Common.Configuration;
  3  using Ryujinx.Graphics.GAL.Multithreading.Commands;
  4  using Ryujinx.Graphics.GAL.Multithreading.Commands.Buffer;
  5  using Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer;
  6  using Ryujinx.Graphics.GAL.Multithreading.Model;
  7  using Ryujinx.Graphics.GAL.Multithreading.Resources;
  8  using Ryujinx.Graphics.GAL.Multithreading.Resources.Programs;
  9  using System;
 10  using System.Diagnostics;
 11  using System.Runtime.CompilerServices;
 12  using System.Runtime.InteropServices;
 13  using System.Threading;
 14  
 15  namespace Ryujinx.Graphics.GAL.Multithreading
 16  {
 17      /// <summary>
 18      /// The ThreadedRenderer is a layer that can be put in front of any Renderer backend to make
 19      /// its processing happen on a separate thread, rather than intertwined with the GPU emulation.
 20      /// A new thread is created to handle the GPU command processing, separate from the renderer thread.
 21      /// Calls to the renderer, pipeline and resources are queued to happen on the renderer thread.
 22      /// </summary>
 23      public class ThreadedRenderer : IRenderer
 24      {
 25          private const int SpanPoolBytes = 4 * 1024 * 1024;
 26          private const int MaxRefsPerCommand = 2;
 27          private const int QueueCount = 10000;
 28  
 29          private readonly int _elementSize;
 30          private readonly IRenderer _baseRenderer;
 31          private Thread _gpuThread;
 32          private Thread _backendThread;
 33          private bool _running;
 34  
 35          private readonly AutoResetEvent _frameComplete = new(true);
 36  
 37          private readonly ManualResetEventSlim _galWorkAvailable;
 38          private readonly CircularSpanPool _spanPool;
 39  
 40          private readonly ManualResetEventSlim _invokeRun;
 41          private readonly AutoResetEvent _interruptRun;
 42  
 43          private bool _lastSampleCounterClear = true;
 44  
 45          private readonly byte[] _commandQueue;
 46          private readonly object[] _refQueue;
 47  
 48          private int _consumerPtr;
 49          private int _commandCount;
 50  
 51          private int _producerPtr;
 52          private int _lastProducedPtr;
 53          private int _invokePtr;
 54  
 55          private int _refProducerPtr;
 56          private int _refConsumerPtr;
 57  
 58          private Action _interruptAction;
 59          private readonly object _interruptLock = new();
 60  
 61          public event EventHandler<ScreenCaptureImageInfo> ScreenCaptured;
 62  
 63          internal BufferMap Buffers { get; }
 64          internal SyncMap Sync { get; }
 65          internal CircularSpanPool SpanPool { get; }
 66          internal ProgramQueue Programs { get; }
 67  
 68          public IPipeline Pipeline { get; }
 69          public IWindow Window { get; }
 70  
 71          public IRenderer BaseRenderer => _baseRenderer;
 72  
 73          public bool PreferThreading => _baseRenderer.PreferThreading;
 74  
 75          public ThreadedRenderer(IRenderer renderer)
 76          {
 77              _baseRenderer = renderer;
 78  
 79              renderer.ScreenCaptured += (sender, info) => ScreenCaptured?.Invoke(this, info);
 80              renderer.SetInterruptAction(Interrupt);
 81  
 82              Pipeline = new ThreadedPipeline(this);
 83              Window = new ThreadedWindow(this, renderer);
 84              Buffers = new BufferMap();
 85              Sync = new SyncMap();
 86              Programs = new ProgramQueue(renderer);
 87  
 88              _galWorkAvailable = new ManualResetEventSlim(false);
 89              _invokeRun = new ManualResetEventSlim();
 90              _interruptRun = new AutoResetEvent(false);
 91              _spanPool = new CircularSpanPool(this, SpanPoolBytes);
 92              SpanPool = _spanPool;
 93  
 94              _elementSize = BitUtils.AlignUp(CommandHelper.GetMaxCommandSize(), 4);
 95  
 96              _commandQueue = new byte[_elementSize * QueueCount];
 97              _refQueue = new object[MaxRefsPerCommand * QueueCount];
 98          }
 99  
100          public void RunLoop(ThreadStart gpuLoop)
101          {
102              _running = true;
103  
104              _backendThread = Thread.CurrentThread;
105  
106              _gpuThread = new Thread(gpuLoop)
107              {
108                  Name = "GPU.MainThread",
109              };
110  
111              _gpuThread.Start();
112  
113              RenderLoop();
114          }
115  
116          public void RenderLoop()
117          {
118              // Power through the render queue until the Gpu thread work is done.
119  
120              while (_running)
121              {
122                  _galWorkAvailable.Wait();
123                  _galWorkAvailable.Reset();
124  
125                  if (Volatile.Read(ref _interruptAction) != null)
126                  {
127                      _interruptAction();
128                      _interruptRun.Set();
129  
130                      Interlocked.Exchange(ref _interruptAction, null);
131                  }
132  
133                  // The other thread can only increase the command count.
134                  // We can assume that if it is above 0, it will stay there or get higher.
135  
136                  while (Volatile.Read(ref _commandCount) > 0 && Volatile.Read(ref _interruptAction) == null)
137                  {
138                      int commandPtr = _consumerPtr;
139  
140                      Span<byte> command = new(_commandQueue, commandPtr * _elementSize, _elementSize);
141  
142                      // Run the command.
143  
144                      CommandHelper.RunCommand(command, this, _baseRenderer);
145  
146                      if (Interlocked.CompareExchange(ref _invokePtr, -1, commandPtr) == commandPtr)
147                      {
148                          _invokeRun.Set();
149                      }
150  
151                      _consumerPtr = (_consumerPtr + 1) % QueueCount;
152  
153                      Interlocked.Decrement(ref _commandCount);
154                  }
155              }
156          }
157  
158          internal SpanRef<T> CopySpan<T>(ReadOnlySpan<T> data) where T : unmanaged
159          {
160              return _spanPool.Insert(data);
161          }
162  
163          private TableRef<T> Ref<T>(T reference)
164          {
165              return new TableRef<T>(this, reference);
166          }
167  
168          internal ref T New<T>() where T : struct
169          {
170              while (_producerPtr == (Volatile.Read(ref _consumerPtr) + QueueCount - 1) % QueueCount)
171              {
172                  // If incrementing the producer pointer would overflow, we need to wait.
173                  // _consumerPtr can only move forward, so there's no race to worry about here.
174  
175                  Thread.Sleep(1);
176              }
177  
178              int taken = _producerPtr;
179              _lastProducedPtr = taken;
180  
181              _producerPtr = (_producerPtr + 1) % QueueCount;
182  
183              Span<byte> memory = new(_commandQueue, taken * _elementSize, _elementSize);
184              ref T result = ref Unsafe.As<byte, T>(ref MemoryMarshal.GetReference(memory));
185  
186              memory[^1] = (byte)((IGALCommand)result).CommandType;
187  
188              return ref result;
189          }
190  
191          internal int AddTableRef(object obj)
192          {
193              // The reference table is sized so that it will never overflow, so long as the references are taken after the command is allocated.
194  
195              int index = _refProducerPtr;
196  
197              _refQueue[index] = obj;
198  
199              _refProducerPtr = (_refProducerPtr + 1) % _refQueue.Length;
200  
201              return index;
202          }
203  
204          internal object RemoveTableRef(int index)
205          {
206              Debug.Assert(index == _refConsumerPtr);
207  
208              object result = _refQueue[_refConsumerPtr];
209              _refQueue[_refConsumerPtr] = null;
210  
211              _refConsumerPtr = (_refConsumerPtr + 1) % _refQueue.Length;
212  
213              return result;
214          }
215  
216          internal void QueueCommand()
217          {
218              int result = Interlocked.Increment(ref _commandCount);
219  
220              if (result == 1)
221              {
222                  _galWorkAvailable.Set();
223              }
224          }
225  
226          internal void InvokeCommand()
227          {
228              _invokeRun.Reset();
229              _invokePtr = _lastProducedPtr;
230  
231              QueueCommand();
232  
233              // Wait for the command to complete.
234              _invokeRun.Wait();
235          }
236  
237          internal void WaitForFrame()
238          {
239              _frameComplete.WaitOne();
240          }
241  
242          internal void SignalFrame()
243          {
244              _frameComplete.Set();
245          }
246  
247          internal bool IsGpuThread()
248          {
249              return Thread.CurrentThread == _gpuThread;
250          }
251  
252          public void BackgroundContextAction(Action action, bool alwaysBackground = false)
253          {
254              if (IsGpuThread() && !alwaysBackground)
255              {
256                  // The action must be performed on the render thread.
257                  New<ActionCommand>().Set(Ref(action));
258                  InvokeCommand();
259              }
260              else
261              {
262                  _baseRenderer.BackgroundContextAction(action, true);
263              }
264          }
265  
266          public BufferHandle CreateBuffer(int size, BufferAccess access)
267          {
268              BufferHandle handle = Buffers.CreateBufferHandle();
269              New<CreateBufferAccessCommand>().Set(handle, size, access);
270              QueueCommand();
271  
272              return handle;
273          }
274  
275          public BufferHandle CreateBuffer(nint pointer, int size)
276          {
277              BufferHandle handle = Buffers.CreateBufferHandle();
278              New<CreateHostBufferCommand>().Set(handle, pointer, size);
279              QueueCommand();
280  
281              return handle;
282          }
283  
284          public BufferHandle CreateBufferSparse(ReadOnlySpan<BufferRange> storageBuffers)
285          {
286              BufferHandle handle = Buffers.CreateBufferHandle();
287              New<CreateBufferSparseCommand>().Set(handle, CopySpan(storageBuffers));
288              QueueCommand();
289  
290              return handle;
291          }
292  
293          public IImageArray CreateImageArray(int size, bool isBuffer)
294          {
295              var imageArray = new ThreadedImageArray(this);
296              New<CreateImageArrayCommand>().Set(Ref(imageArray), size, isBuffer);
297              QueueCommand();
298  
299              return imageArray;
300          }
301  
302          public IProgram CreateProgram(ShaderSource[] shaders, ShaderInfo info)
303          {
304              var program = new ThreadedProgram(this);
305  
306              SourceProgramRequest request = new(program, shaders, info);
307  
308              Programs.Add(request);
309  
310              New<CreateProgramCommand>().Set(Ref((IProgramRequest)request));
311              QueueCommand();
312  
313              return program;
314          }
315  
316          public ISampler CreateSampler(SamplerCreateInfo info)
317          {
318              var sampler = new ThreadedSampler(this);
319              New<CreateSamplerCommand>().Set(Ref(sampler), info);
320              QueueCommand();
321  
322              return sampler;
323          }
324  
325          public void CreateSync(ulong id, bool strict)
326          {
327              Sync.CreateSyncHandle(id);
328              New<CreateSyncCommand>().Set(id, strict);
329              QueueCommand();
330          }
331  
332          public ITexture CreateTexture(TextureCreateInfo info)
333          {
334              if (IsGpuThread())
335              {
336                  var texture = new ThreadedTexture(this, info);
337                  New<CreateTextureCommand>().Set(Ref(texture), info);
338                  QueueCommand();
339  
340                  return texture;
341              }
342              else
343              {
344                  var texture = new ThreadedTexture(this, info)
345                  {
346                      Base = _baseRenderer.CreateTexture(info),
347                  };
348  
349                  return texture;
350              }
351          }
352          public ITextureArray CreateTextureArray(int size, bool isBuffer)
353          {
354              var textureArray = new ThreadedTextureArray(this);
355              New<CreateTextureArrayCommand>().Set(Ref(textureArray), size, isBuffer);
356              QueueCommand();
357  
358              return textureArray;
359          }
360  
361          public void DeleteBuffer(BufferHandle buffer)
362          {
363              New<BufferDisposeCommand>().Set(buffer);
364              QueueCommand();
365          }
366  
367          public PinnedSpan<byte> GetBufferData(BufferHandle buffer, int offset, int size)
368          {
369              if (IsGpuThread())
370              {
371                  ResultBox<PinnedSpan<byte>> box = new();
372                  New<BufferGetDataCommand>().Set(buffer, offset, size, Ref(box));
373                  InvokeCommand();
374  
375                  return box.Result;
376              }
377              else
378              {
379                  return _baseRenderer.GetBufferData(Buffers.MapBufferBlocking(buffer), offset, size);
380              }
381          }
382  
383          public Capabilities GetCapabilities()
384          {
385              ResultBox<Capabilities> box = new();
386              New<GetCapabilitiesCommand>().Set(Ref(box));
387              InvokeCommand();
388  
389              return box.Result;
390          }
391  
392          public ulong GetCurrentSync()
393          {
394              return _baseRenderer.GetCurrentSync();
395          }
396  
397          public HardwareInfo GetHardwareInfo()
398          {
399              return _baseRenderer.GetHardwareInfo();
400          }
401  
402          /// <summary>
403          /// Initialize the base renderer. Must be called on the render thread.
404          /// </summary>
405          /// <param name="logLevel">Log level to use</param>
406          public void Initialize(GraphicsDebugLevel logLevel)
407          {
408              _baseRenderer.Initialize(logLevel);
409          }
410  
411          public IProgram LoadProgramBinary(byte[] programBinary, bool hasFragmentShader, ShaderInfo info)
412          {
413              var program = new ThreadedProgram(this);
414  
415              BinaryProgramRequest request = new(program, programBinary, hasFragmentShader, info);
416              Programs.Add(request);
417  
418              New<CreateProgramCommand>().Set(Ref((IProgramRequest)request));
419              QueueCommand();
420  
421              return program;
422          }
423  
424          public void PreFrame()
425          {
426              New<PreFrameCommand>();
427              QueueCommand();
428          }
429  
430          public ICounterEvent ReportCounter(CounterType type, EventHandler<ulong> resultHandler, float divisor, bool hostReserved)
431          {
432              ThreadedCounterEvent evt = new(this, type, _lastSampleCounterClear);
433              New<ReportCounterCommand>().Set(Ref(evt), type, Ref(resultHandler), divisor, hostReserved);
434              QueueCommand();
435  
436              if (type == CounterType.SamplesPassed)
437              {
438                  _lastSampleCounterClear = false;
439              }
440  
441              return evt;
442          }
443  
444          public void ResetCounter(CounterType type)
445          {
446              New<ResetCounterCommand>().Set(type);
447              QueueCommand();
448              _lastSampleCounterClear = true;
449          }
450  
451          public void Screenshot()
452          {
453              _baseRenderer.Screenshot();
454          }
455  
456          public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
457          {
458              New<BufferSetDataCommand>().Set(buffer, offset, CopySpan(data));
459              QueueCommand();
460          }
461  
462          public void UpdateCounters()
463          {
464              New<UpdateCountersCommand>();
465              QueueCommand();
466          }
467  
468          public void WaitSync(ulong id)
469          {
470              Sync.WaitSyncAvailability(id);
471  
472              _baseRenderer.WaitSync(id);
473          }
474  
475          private void Interrupt(Action action)
476          {
477              // Interrupt the backend thread from any external thread and invoke the given action.
478  
479              if (Thread.CurrentThread == _backendThread)
480              {
481                  // If this is called from the backend thread, the action can run immediately.
482                  action();
483              }
484              else
485              {
486                  lock (_interruptLock)
487                  {
488                      while (Interlocked.CompareExchange(ref _interruptAction, action, null) != null)
489                      {
490                      }
491  
492                      _galWorkAvailable.Set();
493  
494                      _interruptRun.WaitOne();
495                  }
496              }
497          }
498  
499          public void SetInterruptAction(Action<Action> interruptAction)
500          {
501              // Threaded renderer ignores given interrupt action, as it provides its own to the child renderer.
502          }
503  
504          public bool PrepareHostMapping(nint address, ulong size)
505          {
506              return _baseRenderer.PrepareHostMapping(address, size);
507          }
508  
509          public void FlushThreadedCommands()
510          {
511              SpinWait wait = new();
512  
513              while (Volatile.Read(ref _commandCount) > 0)
514              {
515                  wait.SpinOnce();
516              }
517          }
518  
519          public void Dispose()
520          {
521              GC.SuppressFinalize(this);
522  
523              // Dispose must happen from the render thread, after all commands have completed.
524  
525              // Stop the GPU thread.
526              _running = false;
527              _galWorkAvailable.Set();
528  
529              if (_gpuThread != null && _gpuThread.IsAlive)
530              {
531                  _gpuThread.Join();
532              }
533  
534              // Dispose the renderer.
535              _baseRenderer.Dispose();
536  
537              // Dispose events.
538              _frameComplete.Dispose();
539              _galWorkAvailable.Dispose();
540              _invokeRun.Dispose();
541              _interruptRun.Dispose();
542  
543              Sync.Dispose();
544          }
545      }
546  }