/ src / Ryujinx.Graphics.Gpu / Engine / InlineToMemory / InlineToMemoryClass.cs
InlineToMemoryClass.cs
  1  using Ryujinx.Common;
  2  using Ryujinx.Common.Memory;
  3  using Ryujinx.Graphics.Device;
  4  using Ryujinx.Graphics.Texture;
  5  using System;
  6  using System.Collections.Generic;
  7  using System.Runtime.InteropServices;
  8  using System.Runtime.Intrinsics;
  9  
 10  namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
 11  {
 12      /// <summary>
 13      /// Represents a Inline-to-Memory engine class.
 14      /// </summary>
 15      class InlineToMemoryClass : IDeviceState
 16      {
 17          private readonly GpuContext _context;
 18          private readonly GpuChannel _channel;
 19          private readonly DeviceState<InlineToMemoryClassState> _state;
 20  
 21          private bool _isLinear;
 22  
 23          private int _offset;
 24          private int _size;
 25  
 26          private ulong _dstGpuVa;
 27          private int _dstX;
 28          private int _dstY;
 29          private int _dstWidth;
 30          private int _dstHeight;
 31          private int _dstStride;
 32          private int _dstGobBlocksInY;
 33          private int _dstGobBlocksInZ;
 34          private int _lineLengthIn;
 35          private int _lineCount;
 36  
 37          private bool _finished;
 38  
 39          private int[] _buffer;
 40  
 41          /// <summary>
 42          /// Creates a new instance of the Inline-to-Memory engine class.
 43          /// </summary>
 44          /// <param name="context">GPU context</param>
 45          /// <param name="channel">GPU channel</param>
 46          /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param>
 47          public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState)
 48          {
 49              _context = context;
 50              _channel = channel;
 51  
 52              if (initializeState)
 53              {
 54                  _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback>
 55                  {
 56                      { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) },
 57                      { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) },
 58                  });
 59              }
 60          }
 61  
 62          /// <summary>
 63          /// Creates a new instance of the inline-to-memory engine class.
 64          /// </summary>
 65          /// <param name="context">GPU context</param>
 66          /// <param name="channel">GPU channel</param>
 67          public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true)
 68          {
 69          }
 70  
 71          /// <summary>
 72          /// Reads data from the class registers.
 73          /// </summary>
 74          /// <param name="offset">Register byte offset</param>
 75          /// <returns>Data at the specified offset</returns>
 76          public int Read(int offset) => _state.Read(offset);
 77  
 78          /// <summary>
 79          /// Writes data to the class registers.
 80          /// </summary>
 81          /// <param name="offset">Register byte offset</param>
 82          /// <param name="data">Data to be written</param>
 83          public void Write(int offset, int data) => _state.Write(offset, data);
 84  
 85          /// <summary>
 86          /// Launches Inline-to-Memory engine DMA copy.
 87          /// </summary>
 88          /// <param name="argument">Method call argument</param>
 89          private void LaunchDma(int argument)
 90          {
 91              LaunchDma(ref _state.State, argument);
 92          }
 93  
 94          /// <summary>
 95          /// Launches Inline-to-Memory engine DMA copy.
 96          /// </summary>
 97          /// <param name="state">Current class state</param>
 98          /// <param name="argument">Method call argument</param>
 99          public void LaunchDma(ref InlineToMemoryClassState state, int argument)
100          {
101              _isLinear = (argument & 1) != 0;
102  
103              _offset = 0;
104              _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount);
105  
106              int count = _size / 4;
107  
108              if (_buffer == null || _buffer.Length < count)
109              {
110                  _buffer = new int[count];
111              }
112  
113              ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut;
114  
115              _dstGpuVa = dstGpuVa;
116              _dstX = state.SetDstOriginBytesXV;
117              _dstY = state.SetDstOriginSamplesYV;
118              _dstWidth = (int)state.SetDstWidth;
119              _dstHeight = (int)state.SetDstHeight;
120              _dstStride = (int)state.PitchOut;
121              _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight;
122              _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth;
123              _lineLengthIn = (int)state.LineLengthIn;
124              _lineCount = (int)state.LineCount;
125  
126              _finished = false;
127          }
128  
129          /// <summary>
130          /// Pushes a block of data to the Inline-to-Memory engine.
131          /// </summary>
132          /// <param name="data">Data to push</param>
133          public void LoadInlineData(ReadOnlySpan<int> data)
134          {
135              if (!_finished)
136              {
137                  int copySize = Math.Min(data.Length, _buffer.Length - _offset);
138                  data[..copySize].CopyTo(new Span<int>(_buffer).Slice(_offset, copySize));
139  
140                  _offset += copySize;
141  
142                  if (_offset * 4 >= _size)
143                  {
144                      FinishTransfer();
145                  }
146              }
147          }
148  
149          /// <summary>
150          /// Pushes a word of data to the Inline-to-Memory engine.
151          /// </summary>
152          /// <param name="argument">Method call argument</param>
153          public void LoadInlineData(int argument)
154          {
155              if (!_finished)
156              {
157                  _buffer[_offset++] = argument;
158  
159                  if (_offset * 4 >= _size)
160                  {
161                      FinishTransfer();
162                  }
163              }
164          }
165  
166          /// <summary>
167          /// Performs actual copy of the inline data after the transfer is finished.
168          /// </summary>
169          private void FinishTransfer()
170          {
171              var memoryManager = _channel.MemoryManager;
172  
173              var data = MemoryMarshal.Cast<int, byte>(_buffer)[.._size];
174  
175              if (_isLinear && _lineCount == 1)
176              {
177                  memoryManager.WriteTrackedResource(_dstGpuVa, data[.._lineLengthIn]);
178                  _context.AdvanceSequence();
179              }
180              else
181              {
182                  // TODO: Verify if the destination X/Y and width/height are taken into account
183                  // for linear texture transfers. If not, we can use the fast path for that aswell.
184                  // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
185                  if (!_isLinear)
186                  {
187                      var target = memoryManager.Physical.TextureCache.FindTexture(
188                          memoryManager,
189                          _dstGpuVa,
190                          1,
191                          _dstStride,
192                          _dstHeight,
193                          _lineLengthIn,
194                          _lineCount,
195                          _isLinear,
196                          _dstGobBlocksInY,
197                          _dstGobBlocksInZ);
198  
199                      if (target != null)
200                      {
201                          target.SynchronizeMemory();
202                          var dataCopy = MemoryOwner<byte>.RentCopy(data);
203                          target.SetData(dataCopy, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));
204                          target.SignalModified();
205  
206                          return;
207                      }
208                  }
209  
210                  var dstCalculator = new OffsetCalculator(
211                      _dstWidth,
212                      _dstHeight,
213                      _dstStride,
214                      _isLinear,
215                      _dstGobBlocksInY,
216                      1);
217  
218                  int srcOffset = 0;
219  
220                  for (int y = _dstY; y < _dstY + _lineCount; y++)
221                  {
222                      int x1 = _dstX;
223                      int x2 = _dstX + _lineLengthIn;
224                      int x1Round = BitUtils.AlignUp(_dstX, 16);
225                      int x2Trunc = BitUtils.AlignDown(x2, 16);
226  
227                      int x = x1;
228  
229                      if (x1Round <= x2)
230                      {
231                          for (; x < x1Round; x++, srcOffset++)
232                          {
233                              int dstOffset = dstCalculator.GetOffset(x, y);
234  
235                              ulong dstAddress = _dstGpuVa + (uint)dstOffset;
236  
237                              memoryManager.Write(dstAddress, data[srcOffset]);
238                          }
239                      }
240  
241                      for (; x < x2Trunc; x += 16, srcOffset += 16)
242                      {
243                          int dstOffset = dstCalculator.GetOffset(x, y);
244  
245                          ulong dstAddress = _dstGpuVa + (uint)dstOffset;
246  
247                          memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
248                      }
249  
250                      for (; x < x2; x++, srcOffset++)
251                      {
252                          int dstOffset = dstCalculator.GetOffset(x, y);
253  
254                          ulong dstAddress = _dstGpuVa + (uint)dstOffset;
255  
256                          memoryManager.Write(dstAddress, data[srcOffset]);
257                      }
258  
259                      // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
260                      // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
261                      int misalignment = _lineLengthIn & 3;
262  
263                      if (misalignment != 0)
264                      {
265                          srcOffset += 4 - misalignment;
266                      }
267                  }
268  
269                  _context.AdvanceSequence();
270              }
271  
272              _finished = true;
273          }
274      }
275  }