/ src / Ryujinx.Graphics.Gpu / Engine / Dma / DmaClass.cs
DmaClass.cs
  1  using Ryujinx.Common;
  2  using Ryujinx.Common.Memory;
  3  using Ryujinx.Graphics.Device;
  4  using Ryujinx.Graphics.Gpu.Engine.Threed;
  5  using Ryujinx.Graphics.Gpu.Memory;
  6  using Ryujinx.Graphics.Texture;
  7  using System;
  8  using System.Collections.Generic;
  9  using System.Runtime.CompilerServices;
 10  using System.Runtime.InteropServices;
 11  using System.Runtime.Intrinsics;
 12  
 13  namespace Ryujinx.Graphics.Gpu.Engine.Dma
 14  {
 15      /// <summary>
 16      /// Represents a DMA copy engine class.
 17      /// </summary>
 18      class DmaClass : IDeviceState
 19      {
 20          private readonly GpuContext _context;
 21          private readonly GpuChannel _channel;
 22          private readonly ThreedClass _3dEngine;
 23          private readonly DeviceState<DmaClassState> _state;
 24  
 25          /// <summary>
 26          /// Copy flags passed on DMA launch.
 27          /// </summary>
 28          [Flags]
 29          private enum CopyFlags
 30          {
 31              SrcLinear = 1 << 7,
 32              DstLinear = 1 << 8,
 33              MultiLineEnable = 1 << 9,
 34              RemapEnable = 1 << 10,
 35          }
 36  
 37          /// <summary>
 38          /// Texture parameters for copy.
 39          /// </summary>
 40          private readonly struct TextureParams
 41          {
 42              /// <summary>
 43              /// Copy region X coordinate.
 44              /// </summary>
 45              public readonly int RegionX;
 46  
 47              /// <summary>
 48              /// Copy region Y coordinate.
 49              /// </summary>
 50              public readonly int RegionY;
 51  
 52              /// <summary>
 53              /// Offset from the base pointer of the data in memory.
 54              /// </summary>
 55              public readonly int BaseOffset;
 56  
 57              /// <summary>
 58              /// Bytes per pixel.
 59              /// </summary>
 60              public readonly int Bpp;
 61  
 62              /// <summary>
 63              /// Whether the texture is linear. If false, the texture is block linear.
 64              /// </summary>
 65              public readonly bool Linear;
 66  
 67              /// <summary>
 68              /// Pixel offset from XYZ coordinates calculator.
 69              /// </summary>
 70              public readonly OffsetCalculator Calculator;
 71  
 72              /// <summary>
 73              /// Creates texture parameters.
 74              /// </summary>
 75              /// <param name="regionX">Copy region X coordinate</param>
 76              /// <param name="regionY">Copy region Y coordinate</param>
 77              /// <param name="baseOffset">Offset from the base pointer of the data in memory</param>
 78              /// <param name="bpp">Bytes per pixel</param>
 79              /// <param name="linear">Whether the texture is linear. If false, the texture is block linear</param>
 80              /// <param name="calculator">Pixel offset from XYZ coordinates calculator</param>
 81              public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator)
 82              {
 83                  RegionX = regionX;
 84                  RegionY = regionY;
 85                  BaseOffset = baseOffset;
 86                  Bpp = bpp;
 87                  Linear = linear;
 88                  Calculator = calculator;
 89              }
 90          }
 91  
 92          [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)]
 93          private struct UInt24
 94          {
 95              public byte Byte0;
 96              public byte Byte1;
 97              public byte Byte2;
 98          }
 99  
100          /// <summary>
101          /// Creates a new instance of the DMA copy engine class.
102          /// </summary>
103          /// <param name="context">GPU context</param>
104          /// <param name="channel">GPU channel</param>
105          /// <param name="threedEngine">3D engine</param>
106          public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine)
107          {
108              _context = context;
109              _channel = channel;
110              _3dEngine = threedEngine;
111              _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback>
112              {
113                  { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) },
114              });
115          }
116  
117          /// <summary>
118          /// Reads data from the class registers.
119          /// </summary>
120          /// <param name="offset">Register byte offset</param>
121          /// <returns>Data at the specified offset</returns>
122          public int Read(int offset) => _state.Read(offset);
123  
124          /// <summary>
125          /// Writes data to the class registers.
126          /// </summary>
127          /// <param name="offset">Register byte offset</param>
128          /// <param name="data">Data to be written</param>
129          public void Write(int offset, int data) => _state.Write(offset, data);
130  
131          /// <summary>
132          /// Determine if a buffer-to-texture region covers the entirety of a texture.
133          /// </summary>
134          /// <param name="tex">Texture to compare</param>
135          /// <param name="linear">True if the texture is linear, false if block linear</param>
136          /// <param name="bpp">Texture bytes per pixel</param>
137          /// <param name="stride">Texture stride</param>
138          /// <param name="xCount">Number of pixels to be copied</param>
139          /// <param name="yCount">Number of lines to be copied</param>
140          /// <returns></returns>
141          private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
142          {
143              if (linear)
144              {
145                  // If the stride is negative, the texture has to be flipped, so
146                  // the fast copy is not trivial, use the slow path.
147                  if (stride <= 0)
148                  {
149                      return false;
150                  }
151  
152                  int alignWidth = Constants.StrideAlignment / bpp;
153                  return stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
154              }
155              else
156              {
157                  int alignWidth = Constants.GobAlignment / bpp;
158                  return tex.RegionX == 0 &&
159                         tex.RegionY == 0 &&
160                         tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
161                         tex.Height == yCount;
162              }
163          }
164  
165          /// <summary>
166          /// Releases a semaphore for a given LaunchDma method call.
167          /// </summary>
168          /// <param name="argument">The LaunchDma call argument</param>
169          private void ReleaseSemaphore(int argument)
170          {
171              LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3);
172              if (type != LaunchDmaSemaphoreType.None)
173              {
174                  ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB;
175                  if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore)
176                  {
177                      _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload);
178                  }
179                  else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */
180                  {
181                      _channel.MemoryManager.Write(address + 8, _context.GetTimestamp());
182                      _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload);
183                  }
184              }
185          }
186  
187          /// <summary>
188          /// Performs a buffer to buffer, or buffer to texture copy.
189          /// </summary>
190          /// <param name="argument">The LaunchDma call argument</param>
191          private void DmaCopy(int argument)
192          {
193              var memoryManager = _channel.MemoryManager;
194  
195              CopyFlags copyFlags = (CopyFlags)argument;
196  
197              bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
198              bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
199              bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
200              bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
201  
202              uint size = _state.State.LineLengthIn;
203  
204              if (size == 0)
205              {
206                  return;
207              }
208  
209              ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
210              ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
211  
212              int xCount = (int)_state.State.LineLengthIn;
213              int yCount = (int)_state.State.LineCount;
214  
215              _channel.TextureManager.RefreshModifiedTextures();
216              _3dEngine.CreatePendingSyncs();
217              _3dEngine.FlushUboDirty();
218  
219              if (copy2D)
220              {
221                  // Buffer to texture copy.
222                  int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
223                  int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1;
224                  int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
225                  int srcBpp = remap ? srcComponents * componentSize : 1;
226                  int dstBpp = remap ? dstComponents * componentSize : 1;
227  
228                  var dst = Unsafe.As<uint, DmaTexture>(ref _state.State.SetDstBlockSize);
229                  var src = Unsafe.As<uint, DmaTexture>(ref _state.State.SetSrcBlockSize);
230  
231                  int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0;
232  
233                  if (!srcLinear)
234                  {
235                      srcRegionX = src.RegionX;
236                      srcRegionY = src.RegionY;
237                  }
238  
239                  if (!dstLinear)
240                  {
241                      dstRegionX = dst.RegionX;
242                      dstRegionY = dst.RegionY;
243                  }
244  
245                  int srcStride = (int)_state.State.PitchIn;
246                  int dstStride = (int)_state.State.PitchOut;
247  
248                  var srcCalculator = new OffsetCalculator(
249                      src.Width,
250                      src.Height,
251                      srcStride,
252                      srcLinear,
253                      src.MemoryLayout.UnpackGobBlocksInY(),
254                      src.MemoryLayout.UnpackGobBlocksInZ(),
255                      srcBpp);
256  
257                  var dstCalculator = new OffsetCalculator(
258                      dst.Width,
259                      dst.Height,
260                      dstStride,
261                      dstLinear,
262                      dst.MemoryLayout.UnpackGobBlocksInY(),
263                      dst.MemoryLayout.UnpackGobBlocksInZ(),
264                      dstBpp);
265  
266                  (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount);
267                  (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount);
268  
269                  if (srcLinear && srcStride < 0)
270                  {
271                      srcBaseOffset += srcStride * (yCount - 1);
272                  }
273  
274                  if (dstLinear && dstStride < 0)
275                  {
276                      dstBaseOffset += dstStride * (yCount - 1);
277                  }
278  
279                  // If remapping is disabled, we always copy the components directly, in order.
280                  // If it's enabled, but the mapping is just XYZW, we also copy them in order.
281                  bool isIdentityRemap = !remap ||
282                      (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX &&
283                      (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) &&
284                      (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) &&
285                      (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW));
286  
287                  bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
288                  bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
289  
290                  // Check if the source texture exists on the GPU, if it does, do a GPU side copy.
291                  // Otherwise, we would need to flush the source texture which is costly.
292                  // We don't expect the source to be linear in such cases, as linear source usually indicates buffer or CPU written data.
293  
294                  if (completeSource && completeDest && !srcLinear && isIdentityRemap)
295                  {
296                      var source = memoryManager.Physical.TextureCache.FindTexture(
297                          memoryManager,
298                          srcGpuVa,
299                          srcBpp,
300                          srcStride,
301                          src.Height,
302                          xCount,
303                          yCount,
304                          srcLinear,
305                          src.MemoryLayout.UnpackGobBlocksInY(),
306                          src.MemoryLayout.UnpackGobBlocksInZ());
307  
308                      if (source != null && source.Height == yCount)
309                      {
310                          source.SynchronizeMemory();
311  
312                          var target = memoryManager.Physical.TextureCache.FindOrCreateTexture(
313                              memoryManager,
314                              source.Info.FormatInfo,
315                              dstGpuVa,
316                              xCount,
317                              yCount,
318                              dstStride,
319                              dstLinear,
320                              dst.MemoryLayout.UnpackGobBlocksInY(),
321                              dst.MemoryLayout.UnpackGobBlocksInZ());
322  
323                          if (source.ScaleFactor != target.ScaleFactor)
324                          {
325                              target.PropagateScale(source);
326                          }
327  
328                          source.HostTexture.CopyTo(target.HostTexture, 0, 0);
329                          target.SignalModified();
330                          return;
331                      }
332                  }
333  
334                  ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
335  
336                  // Try to set the texture data directly,
337                  // but only if we are doing a complete copy,
338                  // and not for block linear to linear copies, since those are typically accessed from the CPU.
339  
340                  if (completeSource && completeDest && !(dstLinear && !srcLinear) && isIdentityRemap)
341                  {
342                      var target = memoryManager.Physical.TextureCache.FindTexture(
343                          memoryManager,
344                          dstGpuVa,
345                          dstBpp,
346                          dstStride,
347                          dst.Height,
348                          xCount,
349                          yCount,
350                          dstLinear,
351                          dst.MemoryLayout.UnpackGobBlocksInY(),
352                          dst.MemoryLayout.UnpackGobBlocksInZ());
353  
354                      if (target != null)
355                      {
356                          MemoryOwner<byte> data;
357                          if (srcLinear)
358                          {
359                              data = LayoutConverter.ConvertLinearStridedToLinear(
360                                  target.Info.Width,
361                                  target.Info.Height,
362                                  1,
363                                  1,
364                                  xCount * srcBpp,
365                                  srcStride,
366                                  target.Info.FormatInfo.BytesPerPixel,
367                                  srcSpan);
368                          }
369                          else
370                          {
371                              data = LayoutConverter.ConvertBlockLinearToLinear(
372                                  src.Width,
373                                  src.Height,
374                                  src.Depth,
375                                  1,
376                                  1,
377                                  1,
378                                  1,
379                                  1,
380                                  srcBpp,
381                                  src.MemoryLayout.UnpackGobBlocksInY(),
382                                  src.MemoryLayout.UnpackGobBlocksInZ(),
383                                  1,
384                                  new SizeInfo((int)target.Size),
385                                  srcSpan);
386                          }
387  
388                          target.SynchronizeMemory();
389                          target.SetData(data);
390                          target.SignalModified();
391                          return;
392                      }
393                      else if (srcCalculator.LayoutMatches(dstCalculator))
394                      {
395                          // No layout conversion has to be performed, just copy the data entirely.
396                          memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan);
397                          return;
398                      }
399                  }
400  
401                  // OPT: This allocates a (potentially) huge temporary array and then copies an existing
402                  // region of memory into it, data that might get overwritten entirely anyways. Ideally this should
403                  // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides
404                  Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();
405  
406                  TextureParams srcParams = new(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator);
407                  TextureParams dstParams = new(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator);
408  
409                  if (isIdentityRemap)
410                  {
411                      // The order of the components doesn't change, so we can just copy directly
412                      // (with layout conversion if necessary).
413  
414                      switch (srcBpp)
415                      {
416                          case 1:
417                              Copy<byte>(dstSpan, srcSpan, dstParams, srcParams);
418                              break;
419                          case 2:
420                              Copy<ushort>(dstSpan, srcSpan, dstParams, srcParams);
421                              break;
422                          case 4:
423                              Copy<uint>(dstSpan, srcSpan, dstParams, srcParams);
424                              break;
425                          case 8:
426                              Copy<ulong>(dstSpan, srcSpan, dstParams, srcParams);
427                              break;
428                          case 12:
429                              Copy<Bpp12Pixel>(dstSpan, srcSpan, dstParams, srcParams);
430                              break;
431                          case 16:
432                              Copy<Vector128<byte>>(dstSpan, srcSpan, dstParams, srcParams);
433                              break;
434                          default:
435                              throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.");
436                      }
437                  }
438                  else
439                  {
440                      // The order or value of the components might change.
441  
442                      switch (componentSize)
443                      {
444                          case 1:
445                              CopyShuffle<byte>(dstSpan, srcSpan, dstParams, srcParams);
446                              break;
447                          case 2:
448                              CopyShuffle<ushort>(dstSpan, srcSpan, dstParams, srcParams);
449                              break;
450                          case 3:
451                              CopyShuffle<UInt24>(dstSpan, srcSpan, dstParams, srcParams);
452                              break;
453                          case 4:
454                              CopyShuffle<uint>(dstSpan, srcSpan, dstParams, srcParams);
455                              break;
456                          default:
457                              throw new NotSupportedException($"Unable to copy ${componentSize} component size.");
458                      }
459                  }
460  
461                  memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
462              }
463              else
464              {
465                  if (remap &&
466                      _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
467                      _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
468                      _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
469                      _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
470                      _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
471                      _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
472                      _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
473                  {
474                      // Fast path for clears when remap is enabled.
475                      memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
476                  }
477                  else
478                  {
479                      // TODO: Implement remap functionality.
480                      // Buffer to buffer copy.
481  
482                      bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch();
483                      bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch();
484  
485                      if (!srcIsPitchKind && dstIsPitchKind)
486                      {
487                          CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size);
488                      }
489                      else if (srcIsPitchKind && !dstIsPitchKind)
490                      {
491                          CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size);
492                      }
493                      else
494                      {
495                          memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
496                      }
497                  }
498              }
499          }
500  
501          /// <summary>
502          /// Copies data from one texture to another, while performing layout conversion if necessary.
503          /// </summary>
504          /// <typeparam name="T">Pixel type</typeparam>
505          /// <param name="dstSpan">Destination texture memory region</param>
506          /// <param name="srcSpan">Source texture memory region</param>
507          /// <param name="dst">Destination texture parameters</param>
508          /// <param name="src">Source texture parameters</param>
509          private unsafe void Copy<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
510          {
511              int xCount = (int)_state.State.LineLengthIn;
512              int yCount = (int)_state.State.LineCount;
513  
514              if (src.Linear && dst.Linear && src.Bpp == dst.Bpp)
515              {
516                  // Optimized path for purely linear copies - we don't need to calculate every single byte offset,
517                  // and we can make use of Span.CopyTo which is very very fast (even compared to pointers)
518                  for (int y = 0; y < yCount; y++)
519                  {
520                      src.Calculator.SetY(src.RegionY + y);
521                      dst.Calculator.SetY(dst.RegionY + y);
522                      int srcOffset = src.Calculator.GetOffset(src.RegionX);
523                      int dstOffset = dst.Calculator.GetOffset(dst.RegionX);
524                      srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp)
525                          .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp));
526                  }
527              }
528              else
529              {
530                  fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
531                  {
532                      byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
533                      byte* srcBase = srcPtr - src.BaseOffset;
534  
535                      for (int y = 0; y < yCount; y++)
536                      {
537                          src.Calculator.SetY(src.RegionY + y);
538                          dst.Calculator.SetY(dst.RegionY + y);
539  
540                          for (int x = 0; x < xCount; x++)
541                          {
542                              int srcOffset = src.Calculator.GetOffset(src.RegionX + x);
543                              int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
544  
545                              *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
546                          }
547                      }
548                  }
549              }
550          }
551  
552          /// <summary>
553          /// Sets texture pixel data to a constant value, while performing layout conversion if necessary.
554          /// </summary>
555          /// <typeparam name="T">Pixel type</typeparam>
556          /// <param name="dstSpan">Destination texture memory region</param>
557          /// <param name="dst">Destination texture parameters</param>
558          /// <param name="fillValue">Constant pixel value to be set</param>
559          private unsafe void Fill<T>(Span<byte> dstSpan, TextureParams dst, T fillValue) where T : unmanaged
560          {
561              int xCount = (int)_state.State.LineLengthIn;
562              int yCount = (int)_state.State.LineCount;
563  
564              fixed (byte* dstPtr = dstSpan)
565              {
566                  byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
567  
568                  for (int y = 0; y < yCount; y++)
569                  {
570                      dst.Calculator.SetY(dst.RegionY + y);
571  
572                      for (int x = 0; x < xCount; x++)
573                      {
574                          int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x);
575  
576                          *(T*)(dstBase + dstOffset) = fillValue;
577                      }
578                  }
579              }
580          }
581  
582          /// <summary>
583          /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary.
584          /// </summary>
585          /// <typeparam name="T">Pixel type</typeparam>
586          /// <param name="dstSpan">Destination texture memory region</param>
587          /// <param name="srcSpan">Source texture memory region</param>
588          /// <param name="dst">Destination texture parameters</param>
589          /// <param name="src">Source texture parameters</param>
590          private void CopyShuffle<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged
591          {
592              int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1;
593  
594              for (int i = 0; i < dstComponents; i++)
595              {
596                  SetRemapComponentsDst componentsDst = i switch
597                  {
598                      0 => _state.State.SetRemapComponentsDstX,
599                      1 => _state.State.SetRemapComponentsDstY,
600                      2 => _state.State.SetRemapComponentsDstZ,
601                      _ => _state.State.SetRemapComponentsDstW,
602                  };
603  
604                  switch (componentsDst)
605                  {
606                      case SetRemapComponentsDst.SrcX:
607                          Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan, dst, src);
608                          break;
609                      case SetRemapComponentsDst.SrcY:
610                          Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan[Unsafe.SizeOf<T>()..], dst, src);
611                          break;
612                      case SetRemapComponentsDst.SrcZ:
613                          Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan[(Unsafe.SizeOf<T>() * 2)..], dst, src);
614                          break;
615                      case SetRemapComponentsDst.SrcW:
616                          Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan[(Unsafe.SizeOf<T>() * 3)..], dst, src);
617                          break;
618                      case SetRemapComponentsDst.ConstA:
619                          Fill<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstA));
620                          break;
621                      case SetRemapComponentsDst.ConstB:
622                          Fill<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstB));
623                          break;
624                  }
625              }
626          }
627  
628          /// <summary>
629          /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs.
630          /// </summary>
631          /// <param name="memoryManager">GPU memory manager</param>
632          /// <param name="srcGpuVa">Source GPU virtual address</param>
633          /// <param name="dstGpuVa">Destination GPU virtual address</param>
634          /// <param name="size">Size in bytes of the copy</param>
635          private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
636          {
637              if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
638              {
639                  for (ulong offset = 0; offset < size; offset += 16)
640                  {
641                      Vector128<byte> data = memoryManager.Read<Vector128<byte>>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
642                      memoryManager.Write(dstGpuVa + offset, data);
643                  }
644              }
645              else
646              {
647                  for (ulong offset = 0; offset < size; offset++)
648                  {
649                      byte data = memoryManager.Read<byte>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true);
650                      memoryManager.Write(dstGpuVa + offset, data);
651                  }
652              }
653          }
654  
655          /// <summary>
656          /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs.
657          /// </summary>
658          /// <param name="memoryManager">GPU memory manager</param>
659          /// <param name="srcGpuVa">Source GPU virtual address</param>
660          /// <param name="dstGpuVa">Destination GPU virtual address</param>
661          /// <param name="size">Size in bytes of the copy</param>
662          private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size)
663          {
664              if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0)
665              {
666                  for (ulong offset = 0; offset < size; offset += 16)
667                  {
668                      Vector128<byte> data = memoryManager.Read<Vector128<byte>>(srcGpuVa + offset, true);
669                      memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
670                  }
671              }
672              else
673              {
674                  for (ulong offset = 0; offset < size; offset++)
675                  {
676                      byte data = memoryManager.Read<byte>(srcGpuVa + offset, true);
677                      memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data);
678                  }
679              }
680          }
681  
682          /// <summary>
683          /// Calculates the GOB block linear address from a linear address.
684          /// </summary>
685          /// <param name="address">Linear address</param>
686          /// <returns>Block linear address</returns>
687          private static ulong ConvertGobLinearToBlockLinearAddress(ulong address)
688          {
689              // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0
690              return (address & ~0x1f0UL) |
691                  ((address & 0x40) >> 2) |
692                  ((address & 0x10) << 1) |
693                  ((address & 0x180) >> 1) |
694                  ((address & 0x20) << 3);
695          }
696  
697          /// <summary>
698          /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore.
699          /// </summary>
700          /// <param name="argument">Method call argument</param>
701          private void LaunchDma(int argument)
702          {
703              DmaCopy(argument);
704              ReleaseSemaphore(argument);
705          }
706      }
707  }