DmaClass.cs
1 using Ryujinx.Common; 2 using Ryujinx.Common.Memory; 3 using Ryujinx.Graphics.Device; 4 using Ryujinx.Graphics.Gpu.Engine.Threed; 5 using Ryujinx.Graphics.Gpu.Memory; 6 using Ryujinx.Graphics.Texture; 7 using System; 8 using System.Collections.Generic; 9 using System.Runtime.CompilerServices; 10 using System.Runtime.InteropServices; 11 using System.Runtime.Intrinsics; 12 13 namespace Ryujinx.Graphics.Gpu.Engine.Dma 14 { 15 /// <summary> 16 /// Represents a DMA copy engine class. 17 /// </summary> 18 class DmaClass : IDeviceState 19 { 20 private readonly GpuContext _context; 21 private readonly GpuChannel _channel; 22 private readonly ThreedClass _3dEngine; 23 private readonly DeviceState<DmaClassState> _state; 24 25 /// <summary> 26 /// Copy flags passed on DMA launch. 27 /// </summary> 28 [Flags] 29 private enum CopyFlags 30 { 31 SrcLinear = 1 << 7, 32 DstLinear = 1 << 8, 33 MultiLineEnable = 1 << 9, 34 RemapEnable = 1 << 10, 35 } 36 37 /// <summary> 38 /// Texture parameters for copy. 39 /// </summary> 40 private readonly struct TextureParams 41 { 42 /// <summary> 43 /// Copy region X coordinate. 44 /// </summary> 45 public readonly int RegionX; 46 47 /// <summary> 48 /// Copy region Y coordinate. 49 /// </summary> 50 public readonly int RegionY; 51 52 /// <summary> 53 /// Offset from the base pointer of the data in memory. 54 /// </summary> 55 public readonly int BaseOffset; 56 57 /// <summary> 58 /// Bytes per pixel. 59 /// </summary> 60 public readonly int Bpp; 61 62 /// <summary> 63 /// Whether the texture is linear. If false, the texture is block linear. 64 /// </summary> 65 public readonly bool Linear; 66 67 /// <summary> 68 /// Pixel offset from XYZ coordinates calculator. 69 /// </summary> 70 public readonly OffsetCalculator Calculator; 71 72 /// <summary> 73 /// Creates texture parameters. 74 /// </summary> 75 /// <param name="regionX">Copy region X coordinate</param> 76 /// <param name="regionY">Copy region Y coordinate</param> 77 /// <param name="baseOffset">Offset from the base pointer of the data in memory</param> 78 /// <param name="bpp">Bytes per pixel</param> 79 /// <param name="linear">Whether the texture is linear. If false, the texture is block linear</param> 80 /// <param name="calculator">Pixel offset from XYZ coordinates calculator</param> 81 public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator) 82 { 83 RegionX = regionX; 84 RegionY = regionY; 85 BaseOffset = baseOffset; 86 Bpp = bpp; 87 Linear = linear; 88 Calculator = calculator; 89 } 90 } 91 92 [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)] 93 private struct UInt24 94 { 95 public byte Byte0; 96 public byte Byte1; 97 public byte Byte2; 98 } 99 100 /// <summary> 101 /// Creates a new instance of the DMA copy engine class. 102 /// </summary> 103 /// <param name="context">GPU context</param> 104 /// <param name="channel">GPU channel</param> 105 /// <param name="threedEngine">3D engine</param> 106 public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine) 107 { 108 _context = context; 109 _channel = channel; 110 _3dEngine = threedEngine; 111 _state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback> 112 { 113 { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }, 114 }); 115 } 116 117 /// <summary> 118 /// Reads data from the class registers. 119 /// </summary> 120 /// <param name="offset">Register byte offset</param> 121 /// <returns>Data at the specified offset</returns> 122 public int Read(int offset) => _state.Read(offset); 123 124 /// <summary> 125 /// Writes data to the class registers. 126 /// </summary> 127 /// <param name="offset">Register byte offset</param> 128 /// <param name="data">Data to be written</param> 129 public void Write(int offset, int data) => _state.Write(offset, data); 130 131 /// <summary> 132 /// Determine if a buffer-to-texture region covers the entirety of a texture. 133 /// </summary> 134 /// <param name="tex">Texture to compare</param> 135 /// <param name="linear">True if the texture is linear, false if block linear</param> 136 /// <param name="bpp">Texture bytes per pixel</param> 137 /// <param name="stride">Texture stride</param> 138 /// <param name="xCount">Number of pixels to be copied</param> 139 /// <param name="yCount">Number of lines to be copied</param> 140 /// <returns></returns> 141 private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount) 142 { 143 if (linear) 144 { 145 // If the stride is negative, the texture has to be flipped, so 146 // the fast copy is not trivial, use the slow path. 147 if (stride <= 0) 148 { 149 return false; 150 } 151 152 int alignWidth = Constants.StrideAlignment / bpp; 153 return stride / bpp == BitUtils.AlignUp(xCount, alignWidth); 154 } 155 else 156 { 157 int alignWidth = Constants.GobAlignment / bpp; 158 return tex.RegionX == 0 && 159 tex.RegionY == 0 && 160 tex.Width == BitUtils.AlignUp(xCount, alignWidth) && 161 tex.Height == yCount; 162 } 163 } 164 165 /// <summary> 166 /// Releases a semaphore for a given LaunchDma method call. 167 /// </summary> 168 /// <param name="argument">The LaunchDma call argument</param> 169 private void ReleaseSemaphore(int argument) 170 { 171 LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3); 172 if (type != LaunchDmaSemaphoreType.None) 173 { 174 ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB; 175 if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore) 176 { 177 _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload); 178 } 179 else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */ 180 { 181 _channel.MemoryManager.Write(address + 8, _context.GetTimestamp()); 182 _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload); 183 } 184 } 185 } 186 187 /// <summary> 188 /// Performs a buffer to buffer, or buffer to texture copy. 189 /// </summary> 190 /// <param name="argument">The LaunchDma call argument</param> 191 private void DmaCopy(int argument) 192 { 193 var memoryManager = _channel.MemoryManager; 194 195 CopyFlags copyFlags = (CopyFlags)argument; 196 197 bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear); 198 bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear); 199 bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable); 200 bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable); 201 202 uint size = _state.State.LineLengthIn; 203 204 if (size == 0) 205 { 206 return; 207 } 208 209 ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower; 210 ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower; 211 212 int xCount = (int)_state.State.LineLengthIn; 213 int yCount = (int)_state.State.LineCount; 214 215 _channel.TextureManager.RefreshModifiedTextures(); 216 _3dEngine.CreatePendingSyncs(); 217 _3dEngine.FlushUboDirty(); 218 219 if (copy2D) 220 { 221 // Buffer to texture copy. 222 int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1; 223 int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1; 224 int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; 225 int srcBpp = remap ? srcComponents * componentSize : 1; 226 int dstBpp = remap ? dstComponents * componentSize : 1; 227 228 var dst = Unsafe.As<uint, DmaTexture>(ref _state.State.SetDstBlockSize); 229 var src = Unsafe.As<uint, DmaTexture>(ref _state.State.SetSrcBlockSize); 230 231 int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0; 232 233 if (!srcLinear) 234 { 235 srcRegionX = src.RegionX; 236 srcRegionY = src.RegionY; 237 } 238 239 if (!dstLinear) 240 { 241 dstRegionX = dst.RegionX; 242 dstRegionY = dst.RegionY; 243 } 244 245 int srcStride = (int)_state.State.PitchIn; 246 int dstStride = (int)_state.State.PitchOut; 247 248 var srcCalculator = new OffsetCalculator( 249 src.Width, 250 src.Height, 251 srcStride, 252 srcLinear, 253 src.MemoryLayout.UnpackGobBlocksInY(), 254 src.MemoryLayout.UnpackGobBlocksInZ(), 255 srcBpp); 256 257 var dstCalculator = new OffsetCalculator( 258 dst.Width, 259 dst.Height, 260 dstStride, 261 dstLinear, 262 dst.MemoryLayout.UnpackGobBlocksInY(), 263 dst.MemoryLayout.UnpackGobBlocksInZ(), 264 dstBpp); 265 266 (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount); 267 (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount); 268 269 if (srcLinear && srcStride < 0) 270 { 271 srcBaseOffset += srcStride * (yCount - 1); 272 } 273 274 if (dstLinear && dstStride < 0) 275 { 276 dstBaseOffset += dstStride * (yCount - 1); 277 } 278 279 // If remapping is disabled, we always copy the components directly, in order. 280 // If it's enabled, but the mapping is just XYZW, we also copy them in order. 281 bool isIdentityRemap = !remap || 282 (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX && 283 (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) && 284 (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) && 285 (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW)); 286 287 bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); 288 bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); 289 290 // Check if the source texture exists on the GPU, if it does, do a GPU side copy. 291 // Otherwise, we would need to flush the source texture which is costly. 292 // We don't expect the source to be linear in such cases, as linear source usually indicates buffer or CPU written data. 293 294 if (completeSource && completeDest && !srcLinear && isIdentityRemap) 295 { 296 var source = memoryManager.Physical.TextureCache.FindTexture( 297 memoryManager, 298 srcGpuVa, 299 srcBpp, 300 srcStride, 301 src.Height, 302 xCount, 303 yCount, 304 srcLinear, 305 src.MemoryLayout.UnpackGobBlocksInY(), 306 src.MemoryLayout.UnpackGobBlocksInZ()); 307 308 if (source != null && source.Height == yCount) 309 { 310 source.SynchronizeMemory(); 311 312 var target = memoryManager.Physical.TextureCache.FindOrCreateTexture( 313 memoryManager, 314 source.Info.FormatInfo, 315 dstGpuVa, 316 xCount, 317 yCount, 318 dstStride, 319 dstLinear, 320 dst.MemoryLayout.UnpackGobBlocksInY(), 321 dst.MemoryLayout.UnpackGobBlocksInZ()); 322 323 if (source.ScaleFactor != target.ScaleFactor) 324 { 325 target.PropagateScale(source); 326 } 327 328 source.HostTexture.CopyTo(target.HostTexture, 0, 0); 329 target.SignalModified(); 330 return; 331 } 332 } 333 334 ReadOnlySpan<byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); 335 336 // Try to set the texture data directly, 337 // but only if we are doing a complete copy, 338 // and not for block linear to linear copies, since those are typically accessed from the CPU. 339 340 if (completeSource && completeDest && !(dstLinear && !srcLinear) && isIdentityRemap) 341 { 342 var target = memoryManager.Physical.TextureCache.FindTexture( 343 memoryManager, 344 dstGpuVa, 345 dstBpp, 346 dstStride, 347 dst.Height, 348 xCount, 349 yCount, 350 dstLinear, 351 dst.MemoryLayout.UnpackGobBlocksInY(), 352 dst.MemoryLayout.UnpackGobBlocksInZ()); 353 354 if (target != null) 355 { 356 MemoryOwner<byte> data; 357 if (srcLinear) 358 { 359 data = LayoutConverter.ConvertLinearStridedToLinear( 360 target.Info.Width, 361 target.Info.Height, 362 1, 363 1, 364 xCount * srcBpp, 365 srcStride, 366 target.Info.FormatInfo.BytesPerPixel, 367 srcSpan); 368 } 369 else 370 { 371 data = LayoutConverter.ConvertBlockLinearToLinear( 372 src.Width, 373 src.Height, 374 src.Depth, 375 1, 376 1, 377 1, 378 1, 379 1, 380 srcBpp, 381 src.MemoryLayout.UnpackGobBlocksInY(), 382 src.MemoryLayout.UnpackGobBlocksInZ(), 383 1, 384 new SizeInfo((int)target.Size), 385 srcSpan); 386 } 387 388 target.SynchronizeMemory(); 389 target.SetData(data); 390 target.SignalModified(); 391 return; 392 } 393 else if (srcCalculator.LayoutMatches(dstCalculator)) 394 { 395 // No layout conversion has to be performed, just copy the data entirely. 396 memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan); 397 return; 398 } 399 } 400 401 // OPT: This allocates a (potentially) huge temporary array and then copies an existing 402 // region of memory into it, data that might get overwritten entirely anyways. Ideally this should 403 // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides 404 Span<byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray(); 405 406 TextureParams srcParams = new(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator); 407 TextureParams dstParams = new(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator); 408 409 if (isIdentityRemap) 410 { 411 // The order of the components doesn't change, so we can just copy directly 412 // (with layout conversion if necessary). 413 414 switch (srcBpp) 415 { 416 case 1: 417 Copy<byte>(dstSpan, srcSpan, dstParams, srcParams); 418 break; 419 case 2: 420 Copy<ushort>(dstSpan, srcSpan, dstParams, srcParams); 421 break; 422 case 4: 423 Copy<uint>(dstSpan, srcSpan, dstParams, srcParams); 424 break; 425 case 8: 426 Copy<ulong>(dstSpan, srcSpan, dstParams, srcParams); 427 break; 428 case 12: 429 Copy<Bpp12Pixel>(dstSpan, srcSpan, dstParams, srcParams); 430 break; 431 case 16: 432 Copy<Vector128<byte>>(dstSpan, srcSpan, dstParams, srcParams); 433 break; 434 default: 435 throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format."); 436 } 437 } 438 else 439 { 440 // The order or value of the components might change. 441 442 switch (componentSize) 443 { 444 case 1: 445 CopyShuffle<byte>(dstSpan, srcSpan, dstParams, srcParams); 446 break; 447 case 2: 448 CopyShuffle<ushort>(dstSpan, srcSpan, dstParams, srcParams); 449 break; 450 case 3: 451 CopyShuffle<UInt24>(dstSpan, srcSpan, dstParams, srcParams); 452 break; 453 case 4: 454 CopyShuffle<uint>(dstSpan, srcSpan, dstParams, srcParams); 455 break; 456 default: 457 throw new NotSupportedException($"Unable to copy ${componentSize} component size."); 458 } 459 } 460 461 memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan); 462 } 463 else 464 { 465 if (remap && 466 _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA && 467 _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA && 468 _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA && 469 _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA && 470 _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One && 471 _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One && 472 _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four) 473 { 474 // Fast path for clears when remap is enabled. 475 memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA); 476 } 477 else 478 { 479 // TODO: Implement remap functionality. 480 // Buffer to buffer copy. 481 482 bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch(); 483 bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch(); 484 485 if (!srcIsPitchKind && dstIsPitchKind) 486 { 487 CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size); 488 } 489 else if (srcIsPitchKind && !dstIsPitchKind) 490 { 491 CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size); 492 } 493 else 494 { 495 memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size); 496 } 497 } 498 } 499 } 500 501 /// <summary> 502 /// Copies data from one texture to another, while performing layout conversion if necessary. 503 /// </summary> 504 /// <typeparam name="T">Pixel type</typeparam> 505 /// <param name="dstSpan">Destination texture memory region</param> 506 /// <param name="srcSpan">Source texture memory region</param> 507 /// <param name="dst">Destination texture parameters</param> 508 /// <param name="src">Source texture parameters</param> 509 private unsafe void Copy<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged 510 { 511 int xCount = (int)_state.State.LineLengthIn; 512 int yCount = (int)_state.State.LineCount; 513 514 if (src.Linear && dst.Linear && src.Bpp == dst.Bpp) 515 { 516 // Optimized path for purely linear copies - we don't need to calculate every single byte offset, 517 // and we can make use of Span.CopyTo which is very very fast (even compared to pointers) 518 for (int y = 0; y < yCount; y++) 519 { 520 src.Calculator.SetY(src.RegionY + y); 521 dst.Calculator.SetY(dst.RegionY + y); 522 int srcOffset = src.Calculator.GetOffset(src.RegionX); 523 int dstOffset = dst.Calculator.GetOffset(dst.RegionX); 524 srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp) 525 .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp)); 526 } 527 } 528 else 529 { 530 fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) 531 { 532 byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. 533 byte* srcBase = srcPtr - src.BaseOffset; 534 535 for (int y = 0; y < yCount; y++) 536 { 537 src.Calculator.SetY(src.RegionY + y); 538 dst.Calculator.SetY(dst.RegionY + y); 539 540 for (int x = 0; x < xCount; x++) 541 { 542 int srcOffset = src.Calculator.GetOffset(src.RegionX + x); 543 int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); 544 545 *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset); 546 } 547 } 548 } 549 } 550 } 551 552 /// <summary> 553 /// Sets texture pixel data to a constant value, while performing layout conversion if necessary. 554 /// </summary> 555 /// <typeparam name="T">Pixel type</typeparam> 556 /// <param name="dstSpan">Destination texture memory region</param> 557 /// <param name="dst">Destination texture parameters</param> 558 /// <param name="fillValue">Constant pixel value to be set</param> 559 private unsafe void Fill<T>(Span<byte> dstSpan, TextureParams dst, T fillValue) where T : unmanaged 560 { 561 int xCount = (int)_state.State.LineLengthIn; 562 int yCount = (int)_state.State.LineCount; 563 564 fixed (byte* dstPtr = dstSpan) 565 { 566 byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. 567 568 for (int y = 0; y < yCount; y++) 569 { 570 dst.Calculator.SetY(dst.RegionY + y); 571 572 for (int x = 0; x < xCount; x++) 573 { 574 int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); 575 576 *(T*)(dstBase + dstOffset) = fillValue; 577 } 578 } 579 } 580 } 581 582 /// <summary> 583 /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary. 584 /// </summary> 585 /// <typeparam name="T">Pixel type</typeparam> 586 /// <param name="dstSpan">Destination texture memory region</param> 587 /// <param name="srcSpan">Source texture memory region</param> 588 /// <param name="dst">Destination texture parameters</param> 589 /// <param name="src">Source texture parameters</param> 590 private void CopyShuffle<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan, TextureParams dst, TextureParams src) where T : unmanaged 591 { 592 int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; 593 594 for (int i = 0; i < dstComponents; i++) 595 { 596 SetRemapComponentsDst componentsDst = i switch 597 { 598 0 => _state.State.SetRemapComponentsDstX, 599 1 => _state.State.SetRemapComponentsDstY, 600 2 => _state.State.SetRemapComponentsDstZ, 601 _ => _state.State.SetRemapComponentsDstW, 602 }; 603 604 switch (componentsDst) 605 { 606 case SetRemapComponentsDst.SrcX: 607 Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan, dst, src); 608 break; 609 case SetRemapComponentsDst.SrcY: 610 Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan[Unsafe.SizeOf<T>()..], dst, src); 611 break; 612 case SetRemapComponentsDst.SrcZ: 613 Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan[(Unsafe.SizeOf<T>() * 2)..], dst, src); 614 break; 615 case SetRemapComponentsDst.SrcW: 616 Copy<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], srcSpan[(Unsafe.SizeOf<T>() * 3)..], dst, src); 617 break; 618 case SetRemapComponentsDst.ConstA: 619 Fill<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstA)); 620 break; 621 case SetRemapComponentsDst.ConstB: 622 Fill<T>(dstSpan[(Unsafe.SizeOf<T>() * i)..], dst, Unsafe.As<uint, T>(ref _state.State.SetRemapConstB)); 623 break; 624 } 625 } 626 } 627 628 /// <summary> 629 /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs. 630 /// </summary> 631 /// <param name="memoryManager">GPU memory manager</param> 632 /// <param name="srcGpuVa">Source GPU virtual address</param> 633 /// <param name="dstGpuVa">Destination GPU virtual address</param> 634 /// <param name="size">Size in bytes of the copy</param> 635 private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size) 636 { 637 if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0) 638 { 639 for (ulong offset = 0; offset < size; offset += 16) 640 { 641 Vector128<byte> data = memoryManager.Read<Vector128<byte>>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true); 642 memoryManager.Write(dstGpuVa + offset, data); 643 } 644 } 645 else 646 { 647 for (ulong offset = 0; offset < size; offset++) 648 { 649 byte data = memoryManager.Read<byte>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true); 650 memoryManager.Write(dstGpuVa + offset, data); 651 } 652 } 653 } 654 655 /// <summary> 656 /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs. 657 /// </summary> 658 /// <param name="memoryManager">GPU memory manager</param> 659 /// <param name="srcGpuVa">Source GPU virtual address</param> 660 /// <param name="dstGpuVa">Destination GPU virtual address</param> 661 /// <param name="size">Size in bytes of the copy</param> 662 private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size) 663 { 664 if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0) 665 { 666 for (ulong offset = 0; offset < size; offset += 16) 667 { 668 Vector128<byte> data = memoryManager.Read<Vector128<byte>>(srcGpuVa + offset, true); 669 memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data); 670 } 671 } 672 else 673 { 674 for (ulong offset = 0; offset < size; offset++) 675 { 676 byte data = memoryManager.Read<byte>(srcGpuVa + offset, true); 677 memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data); 678 } 679 } 680 } 681 682 /// <summary> 683 /// Calculates the GOB block linear address from a linear address. 684 /// </summary> 685 /// <param name="address">Linear address</param> 686 /// <returns>Block linear address</returns> 687 private static ulong ConvertGobLinearToBlockLinearAddress(ulong address) 688 { 689 // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0 690 return (address & ~0x1f0UL) | 691 ((address & 0x40) >> 2) | 692 ((address & 0x10) << 1) | 693 ((address & 0x180) >> 1) | 694 ((address & 0x20) << 3); 695 } 696 697 /// <summary> 698 /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore. 699 /// </summary> 700 /// <param name="argument">Method call argument</param> 701 private void LaunchDma(int argument) 702 { 703 DmaCopy(argument); 704 ReleaseSemaphore(argument); 705 } 706 } 707 }