/ src / Ryujinx.Graphics.Gpu / Engine / Compute / ComputeClass.cs
ComputeClass.cs
  1  using Ryujinx.Graphics.Device;
  2  using Ryujinx.Graphics.Gpu.Engine.InlineToMemory;
  3  using Ryujinx.Graphics.Gpu.Engine.Threed;
  4  using Ryujinx.Graphics.Gpu.Engine.Types;
  5  using Ryujinx.Graphics.Gpu.Shader;
  6  using Ryujinx.Graphics.Shader;
  7  using System;
  8  using System.Collections.Generic;
  9  using System.Runtime.CompilerServices;
 10  
 11  namespace Ryujinx.Graphics.Gpu.Engine.Compute
 12  {
 13      /// <summary>
 14      /// Represents a compute engine class.
 15      /// </summary>
 16      class ComputeClass : IDeviceState
 17      {
 18          private readonly GpuContext _context;
 19          private readonly GpuChannel _channel;
 20          private readonly ThreedClass _3dEngine;
 21          private readonly DeviceState<ComputeClassState> _state;
 22  
 23          private readonly InlineToMemoryClass _i2mClass;
 24  
 25          /// <summary>
 26          /// Creates a new instance of the compute engine class.
 27          /// </summary>
 28          /// <param name="context">GPU context</param>
 29          /// <param name="channel">GPU channel</param>
 30          /// <param name="threedEngine">3D engine</param>
 31          public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
 32          {
 33              _context = context;
 34              _channel = channel;
 35              _3dEngine = threedEngine;
 36              _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback>
 37              {
 38                  { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) },
 39                  { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
 40                  { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) },
 41              });
 42  
 43              _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false);
 44          }
 45  
 46          /// <summary>
 47          /// Reads data from the class registers.
 48          /// </summary>
 49          /// <param name="offset">Register byte offset</param>
 50          /// <returns>Data at the specified offset</returns>
 51          public int Read(int offset) => _state.Read(offset);
 52  
 53          /// <summary>
 54          /// Writes data to the class registers.
 55          /// </summary>
 56          /// <param name="offset">Register byte offset</param>
 57          /// <param name="data">Data to be written</param>
 58          public void Write(int offset, int data) => _state.Write(offset, data);
 59  
 60          /// <summary>
 61          /// Launches the Inline-to-Memory DMA copy operation.
 62          /// </summary>
 63          /// <param name="argument">Method call argument</param>
 64          private void LaunchDma(int argument)
 65          {
 66              _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument);
 67          }
 68  
 69          /// <summary>
 70          /// Pushes a block of data to the Inline-to-Memory engine.
 71          /// </summary>
 72          /// <param name="data">Data to push</param>
 73          public void LoadInlineData(ReadOnlySpan<int> data)
 74          {
 75              _i2mClass.LoadInlineData(data);
 76          }
 77  
 78          /// <summary>
 79          /// Pushes a word of data to the Inline-to-Memory engine.
 80          /// </summary>
 81          /// <param name="argument">Method call argument</param>
 82          private void LoadInlineData(int argument)
 83          {
 84              _i2mClass.LoadInlineData(argument);
 85          }
 86  
 87          /// <summary>
 88          /// Performs the compute dispatch operation.
 89          /// </summary>
 90          /// <param name="argument">Method call argument</param>
 91          private void SendSignalingPcasB(int argument)
 92          {
 93              var memoryManager = _channel.MemoryManager;
 94  
 95              // Since we're going to change the state, make sure any pending instanced draws are done.
 96              _3dEngine.PerformDeferredDraws();
 97  
 98              // Make sure all pending uniform buffer data is written to memory.
 99              _3dEngine.FlushUboDirty();
100  
101              uint qmdAddress = _state.State.SendPcasA;
102  
103              var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8);
104  
105              ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB;
106  
107              shaderGpuVa += (uint)qmd.ProgramOffset;
108  
109              int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
110  
111              int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
112  
113              for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
114              {
115                  if (!qmd.ConstantBufferValid(index))
116                  {
117                      continue;
118                  }
119  
120                  ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
121                  ulong size = (ulong)qmd.ConstantBufferSize(index);
122  
123                  _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
124              }
125  
126              ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB;
127              ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB;
128  
129              int samplerPoolMaximumId = _state.State.SetTexSamplerPoolCMaximumIndex;
130  
131              GpuChannelPoolState poolState = new(
132                  texturePoolGpuVa,
133                  _state.State.SetTexHeaderPoolCMaximumIndex,
134                  _state.State.SetBindlessTextureConstantBufferSlotSelect);
135  
136              GpuChannelComputeState computeState = new(
137                  qmd.CtaThreadDimension0,
138                  qmd.CtaThreadDimension1,
139                  qmd.CtaThreadDimension2,
140                  localMemorySize,
141                  sharedMemorySize,
142                  _channel.BufferManager.HasUnalignedStorageBuffers);
143  
144              CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, samplerPoolMaximumId, poolState, computeState, shaderGpuVa);
145  
146              _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
147  
148              _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex);
149              _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex);
150              _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect);
151  
152              ShaderProgramInfo info = cs.Shaders[0].Info;
153  
154              for (int index = 0; index < info.SBuffers.Count; index++)
155              {
156                  BufferDescriptor sb = info.SBuffers[index];
157  
158                  ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(sb.SbCbSlot);
159                  sbDescAddress += (ulong)sb.SbCbOffset * 4;
160  
161                  SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
162  
163                  uint size;
164                  if (sb.SbCbSlot == Constants.DriverReservedUniformBuffer)
165                  {
166                      // Only trust the SbDescriptor size if it comes from slot 0.
167                      size = (uint)sbDescriptor.Size;
168                  }
169                  else
170                  {
171                      // TODO: Use full mapped size and somehow speed up buffer sync.
172                      size = (uint)_channel.MemoryManager.GetMappedSize(sbDescriptor.PackAddress(), Constants.MaxUnknownStorageSize);
173                  }
174  
175                  _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), size, sb.Flags);
176              }
177  
178              if (_channel.BufferManager.HasUnalignedStorageBuffers != computeState.HasUnalignedStorageBuffer)
179              {
180                  // Refetch the shader, as assumptions about storage buffer alignment have changed.
181                  computeState = new GpuChannelComputeState(
182                      qmd.CtaThreadDimension0,
183                      qmd.CtaThreadDimension1,
184                      qmd.CtaThreadDimension2,
185                      localMemorySize,
186                      sharedMemorySize,
187                      _channel.BufferManager.HasUnalignedStorageBuffers);
188  
189                  cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, samplerPoolMaximumId, poolState, computeState, shaderGpuVa);
190  
191                  _context.Renderer.Pipeline.SetProgram(cs.HostProgram);
192              }
193  
194              _channel.BufferManager.SetComputeBufferBindings(cs.Bindings);
195  
196              _channel.TextureManager.SetComputeBindings(cs.Bindings);
197  
198              // Should never return false for mismatching spec state, since the shader was fetched above.
199              _channel.TextureManager.CommitComputeBindings(cs.SpecializationState);
200  
201              _channel.BufferManager.CommitComputeBindings();
202  
203              _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth);
204  
205              _3dEngine.ForceShaderUpdate();
206          }
207      }
208  }