InlineToMemoryClass.cs
1 using Ryujinx.Common; 2 using Ryujinx.Common.Memory; 3 using Ryujinx.Graphics.Device; 4 using Ryujinx.Graphics.Texture; 5 using System; 6 using System.Collections.Generic; 7 using System.Runtime.InteropServices; 8 using System.Runtime.Intrinsics; 9 10 namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory 11 { 12 /// <summary> 13 /// Represents a Inline-to-Memory engine class. 14 /// </summary> 15 class InlineToMemoryClass : IDeviceState 16 { 17 private readonly GpuContext _context; 18 private readonly GpuChannel _channel; 19 private readonly DeviceState<InlineToMemoryClassState> _state; 20 21 private bool _isLinear; 22 23 private int _offset; 24 private int _size; 25 26 private ulong _dstGpuVa; 27 private int _dstX; 28 private int _dstY; 29 private int _dstWidth; 30 private int _dstHeight; 31 private int _dstStride; 32 private int _dstGobBlocksInY; 33 private int _dstGobBlocksInZ; 34 private int _lineLengthIn; 35 private int _lineCount; 36 37 private bool _finished; 38 39 private int[] _buffer; 40 41 /// <summary> 42 /// Creates a new instance of the Inline-to-Memory engine class. 43 /// </summary> 44 /// <param name="context">GPU context</param> 45 /// <param name="channel">GPU channel</param> 46 /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param> 47 public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState) 48 { 49 _context = context; 50 _channel = channel; 51 52 if (initializeState) 53 { 54 _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback> 55 { 56 { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) }, 57 { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }, 58 }); 59 } 60 } 61 62 /// <summary> 63 /// Creates a new instance of the inline-to-memory engine class. 64 /// </summary> 65 /// <param name="context">GPU context</param> 66 /// <param name="channel">GPU channel</param> 67 public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true) 68 { 69 } 70 71 /// <summary> 72 /// Reads data from the class registers. 73 /// </summary> 74 /// <param name="offset">Register byte offset</param> 75 /// <returns>Data at the specified offset</returns> 76 public int Read(int offset) => _state.Read(offset); 77 78 /// <summary> 79 /// Writes data to the class registers. 80 /// </summary> 81 /// <param name="offset">Register byte offset</param> 82 /// <param name="data">Data to be written</param> 83 public void Write(int offset, int data) => _state.Write(offset, data); 84 85 /// <summary> 86 /// Launches Inline-to-Memory engine DMA copy. 87 /// </summary> 88 /// <param name="argument">Method call argument</param> 89 private void LaunchDma(int argument) 90 { 91 LaunchDma(ref _state.State, argument); 92 } 93 94 /// <summary> 95 /// Launches Inline-to-Memory engine DMA copy. 96 /// </summary> 97 /// <param name="state">Current class state</param> 98 /// <param name="argument">Method call argument</param> 99 public void LaunchDma(ref InlineToMemoryClassState state, int argument) 100 { 101 _isLinear = (argument & 1) != 0; 102 103 _offset = 0; 104 _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount); 105 106 int count = _size / 4; 107 108 if (_buffer == null || _buffer.Length < count) 109 { 110 _buffer = new int[count]; 111 } 112 113 ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut; 114 115 _dstGpuVa = dstGpuVa; 116 _dstX = state.SetDstOriginBytesXV; 117 _dstY = state.SetDstOriginSamplesYV; 118 _dstWidth = (int)state.SetDstWidth; 119 _dstHeight = (int)state.SetDstHeight; 120 _dstStride = (int)state.PitchOut; 121 _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight; 122 _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth; 123 _lineLengthIn = (int)state.LineLengthIn; 124 _lineCount = (int)state.LineCount; 125 126 _finished = false; 127 } 128 129 /// <summary> 130 /// Pushes a block of data to the Inline-to-Memory engine. 131 /// </summary> 132 /// <param name="data">Data to push</param> 133 public void LoadInlineData(ReadOnlySpan<int> data) 134 { 135 if (!_finished) 136 { 137 int copySize = Math.Min(data.Length, _buffer.Length - _offset); 138 data[..copySize].CopyTo(new Span<int>(_buffer).Slice(_offset, copySize)); 139 140 _offset += copySize; 141 142 if (_offset * 4 >= _size) 143 { 144 FinishTransfer(); 145 } 146 } 147 } 148 149 /// <summary> 150 /// Pushes a word of data to the Inline-to-Memory engine. 151 /// </summary> 152 /// <param name="argument">Method call argument</param> 153 public void LoadInlineData(int argument) 154 { 155 if (!_finished) 156 { 157 _buffer[_offset++] = argument; 158 159 if (_offset * 4 >= _size) 160 { 161 FinishTransfer(); 162 } 163 } 164 } 165 166 /// <summary> 167 /// Performs actual copy of the inline data after the transfer is finished. 168 /// </summary> 169 private void FinishTransfer() 170 { 171 var memoryManager = _channel.MemoryManager; 172 173 var data = MemoryMarshal.Cast<int, byte>(_buffer)[.._size]; 174 175 if (_isLinear && _lineCount == 1) 176 { 177 memoryManager.WriteTrackedResource(_dstGpuVa, data[.._lineLengthIn]); 178 _context.AdvanceSequence(); 179 } 180 else 181 { 182 // TODO: Verify if the destination X/Y and width/height are taken into account 183 // for linear texture transfers. If not, we can use the fast path for that aswell. 184 // Right now the copy code at the bottom assumes that it is used on both which might be incorrect. 185 if (!_isLinear) 186 { 187 var target = memoryManager.Physical.TextureCache.FindTexture( 188 memoryManager, 189 _dstGpuVa, 190 1, 191 _dstStride, 192 _dstHeight, 193 _lineLengthIn, 194 _lineCount, 195 _isLinear, 196 _dstGobBlocksInY, 197 _dstGobBlocksInZ); 198 199 if (target != null) 200 { 201 target.SynchronizeMemory(); 202 var dataCopy = MemoryOwner<byte>.RentCopy(data); 203 target.SetData(dataCopy, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount)); 204 target.SignalModified(); 205 206 return; 207 } 208 } 209 210 var dstCalculator = new OffsetCalculator( 211 _dstWidth, 212 _dstHeight, 213 _dstStride, 214 _isLinear, 215 _dstGobBlocksInY, 216 1); 217 218 int srcOffset = 0; 219 220 for (int y = _dstY; y < _dstY + _lineCount; y++) 221 { 222 int x1 = _dstX; 223 int x2 = _dstX + _lineLengthIn; 224 int x1Round = BitUtils.AlignUp(_dstX, 16); 225 int x2Trunc = BitUtils.AlignDown(x2, 16); 226 227 int x = x1; 228 229 if (x1Round <= x2) 230 { 231 for (; x < x1Round; x++, srcOffset++) 232 { 233 int dstOffset = dstCalculator.GetOffset(x, y); 234 235 ulong dstAddress = _dstGpuVa + (uint)dstOffset; 236 237 memoryManager.Write(dstAddress, data[srcOffset]); 238 } 239 } 240 241 for (; x < x2Trunc; x += 16, srcOffset += 16) 242 { 243 int dstOffset = dstCalculator.GetOffset(x, y); 244 245 ulong dstAddress = _dstGpuVa + (uint)dstOffset; 246 247 memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]); 248 } 249 250 for (; x < x2; x++, srcOffset++) 251 { 252 int dstOffset = dstCalculator.GetOffset(x, y); 253 254 ulong dstAddress = _dstGpuVa + (uint)dstOffset; 255 256 memoryManager.Write(dstAddress, data[srcOffset]); 257 } 258 259 // All lines must be aligned to 4 bytes, as the data is pushed one word at a time. 260 // If our copy length is not a multiple of 4, then we need to skip the padding bytes here. 261 int misalignment = _lineLengthIn & 3; 262 263 if (misalignment != 0) 264 { 265 srcOffset += 4 - misalignment; 266 } 267 } 268 269 _context.AdvanceSequence(); 270 } 271 272 _finished = true; 273 } 274 } 275 }