ComputeClass.cs
1 using Ryujinx.Graphics.Device; 2 using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; 3 using Ryujinx.Graphics.Gpu.Engine.Threed; 4 using Ryujinx.Graphics.Gpu.Engine.Types; 5 using Ryujinx.Graphics.Gpu.Shader; 6 using Ryujinx.Graphics.Shader; 7 using System; 8 using System.Collections.Generic; 9 using System.Runtime.CompilerServices; 10 11 namespace Ryujinx.Graphics.Gpu.Engine.Compute 12 { 13 /// <summary> 14 /// Represents a compute engine class. 15 /// </summary> 16 class ComputeClass : IDeviceState 17 { 18 private readonly GpuContext _context; 19 private readonly GpuChannel _channel; 20 private readonly ThreedClass _3dEngine; 21 private readonly DeviceState<ComputeClassState> _state; 22 23 private readonly InlineToMemoryClass _i2mClass; 24 25 /// <summary> 26 /// Creates a new instance of the compute engine class. 27 /// </summary> 28 /// <param name="context">GPU context</param> 29 /// <param name="channel">GPU channel</param> 30 /// <param name="threedEngine">3D engine</param> 31 public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine) 32 { 33 _context = context; 34 _channel = channel; 35 _3dEngine = threedEngine; 36 _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback> 37 { 38 { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) }, 39 { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }, 40 { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) }, 41 }); 42 43 _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false); 44 } 45 46 /// <summary> 47 /// Reads data from the class registers. 48 /// </summary> 49 /// <param name="offset">Register byte offset</param> 50 /// <returns>Data at the specified offset</returns> 51 public int Read(int offset) => _state.Read(offset); 52 53 /// <summary> 54 /// Writes data to the class registers. 55 /// </summary> 56 /// <param name="offset">Register byte offset</param> 57 /// <param name="data">Data to be written</param> 58 public void Write(int offset, int data) => _state.Write(offset, data); 59 60 /// <summary> 61 /// Launches the Inline-to-Memory DMA copy operation. 62 /// </summary> 63 /// <param name="argument">Method call argument</param> 64 private void LaunchDma(int argument) 65 { 66 _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument); 67 } 68 69 /// <summary> 70 /// Pushes a block of data to the Inline-to-Memory engine. 71 /// </summary> 72 /// <param name="data">Data to push</param> 73 public void LoadInlineData(ReadOnlySpan<int> data) 74 { 75 _i2mClass.LoadInlineData(data); 76 } 77 78 /// <summary> 79 /// Pushes a word of data to the Inline-to-Memory engine. 80 /// </summary> 81 /// <param name="argument">Method call argument</param> 82 private void LoadInlineData(int argument) 83 { 84 _i2mClass.LoadInlineData(argument); 85 } 86 87 /// <summary> 88 /// Performs the compute dispatch operation. 89 /// </summary> 90 /// <param name="argument">Method call argument</param> 91 private void SendSignalingPcasB(int argument) 92 { 93 var memoryManager = _channel.MemoryManager; 94 95 // Since we're going to change the state, make sure any pending instanced draws are done. 96 _3dEngine.PerformDeferredDraws(); 97 98 // Make sure all pending uniform buffer data is written to memory. 99 _3dEngine.FlushUboDirty(); 100 101 uint qmdAddress = _state.State.SendPcasA; 102 103 var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8); 104 105 ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB; 106 107 shaderGpuVa += (uint)qmd.ProgramOffset; 108 109 int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; 110 111 int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); 112 113 for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) 114 { 115 if (!qmd.ConstantBufferValid(index)) 116 { 117 continue; 118 } 119 120 ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; 121 ulong size = (ulong)qmd.ConstantBufferSize(index); 122 123 _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size); 124 } 125 126 ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; 127 ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; 128 129 int samplerPoolMaximumId = _state.State.SetTexSamplerPoolCMaximumIndex; 130 131 GpuChannelPoolState poolState = new( 132 texturePoolGpuVa, 133 _state.State.SetTexHeaderPoolCMaximumIndex, 134 _state.State.SetBindlessTextureConstantBufferSlotSelect); 135 136 GpuChannelComputeState computeState = new( 137 qmd.CtaThreadDimension0, 138 qmd.CtaThreadDimension1, 139 qmd.CtaThreadDimension2, 140 localMemorySize, 141 sharedMemorySize, 142 _channel.BufferManager.HasUnalignedStorageBuffers); 143 144 CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, samplerPoolMaximumId, poolState, computeState, shaderGpuVa); 145 146 _context.Renderer.Pipeline.SetProgram(cs.HostProgram); 147 148 _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); 149 _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex); 150 _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect); 151 152 ShaderProgramInfo info = cs.Shaders[0].Info; 153 154 for (int index = 0; index < info.SBuffers.Count; index++) 155 { 156 BufferDescriptor sb = info.SBuffers[index]; 157 158 ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(sb.SbCbSlot); 159 sbDescAddress += (ulong)sb.SbCbOffset * 4; 160 161 SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress); 162 163 uint size; 164 if (sb.SbCbSlot == Constants.DriverReservedUniformBuffer) 165 { 166 // Only trust the SbDescriptor size if it comes from slot 0. 167 size = (uint)sbDescriptor.Size; 168 } 169 else 170 { 171 // TODO: Use full mapped size and somehow speed up buffer sync. 172 size = (uint)_channel.MemoryManager.GetMappedSize(sbDescriptor.PackAddress(), Constants.MaxUnknownStorageSize); 173 } 174 175 _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), size, sb.Flags); 176 } 177 178 if (_channel.BufferManager.HasUnalignedStorageBuffers != computeState.HasUnalignedStorageBuffer) 179 { 180 // Refetch the shader, as assumptions about storage buffer alignment have changed. 181 computeState = new GpuChannelComputeState( 182 qmd.CtaThreadDimension0, 183 qmd.CtaThreadDimension1, 184 qmd.CtaThreadDimension2, 185 localMemorySize, 186 sharedMemorySize, 187 _channel.BufferManager.HasUnalignedStorageBuffers); 188 189 cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, samplerPoolMaximumId, poolState, computeState, shaderGpuVa); 190 191 _context.Renderer.Pipeline.SetProgram(cs.HostProgram); 192 } 193 194 _channel.BufferManager.SetComputeBufferBindings(cs.Bindings); 195 196 _channel.TextureManager.SetComputeBindings(cs.Bindings); 197 198 // Should never return false for mismatching spec state, since the shader was fetched above. 199 _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); 200 201 _channel.BufferManager.CommitComputeBindings(); 202 203 _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); 204 205 _3dEngine.ForceShaderUpdate(); 206 } 207 } 208 }