SurfaceReader.cs
1 using Ryujinx.Common; 2 using Ryujinx.Graphics.Device; 3 using Ryujinx.Graphics.Texture; 4 using Ryujinx.Graphics.Video; 5 using System; 6 using System.Runtime.Intrinsics; 7 using System.Runtime.Intrinsics.X86; 8 using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon; 9 10 namespace Ryujinx.Graphics.Nvdec.Image 11 { 12 static class SurfaceReader 13 { 14 public static void Read(DeviceMemoryManager mm, ISurface surface, uint lumaOffset, uint chromaOffset) 15 { 16 int width = surface.Width; 17 int height = surface.Height; 18 int stride = surface.Stride; 19 20 ReadOnlySpan<byte> luma = mm.DeviceGetSpan(lumaOffset, GetBlockLinearSize(width, height, 1)); 21 22 ReadLuma(surface.YPlane.AsSpan(), luma, stride, width, height); 23 24 int uvWidth = surface.UvWidth; 25 int uvHeight = surface.UvHeight; 26 int uvStride = surface.UvStride; 27 28 ReadOnlySpan<byte> chroma = mm.DeviceGetSpan(chromaOffset, GetBlockLinearSize(uvWidth, uvHeight, 2)); 29 30 ReadChroma(surface.UPlane.AsSpan(), surface.VPlane.AsSpan(), chroma, uvStride, uvWidth, uvHeight); 31 } 32 33 private static void ReadLuma(Span<byte> dst, ReadOnlySpan<byte> src, int dstStride, int width, int height) 34 { 35 LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, 1, 2, src); 36 } 37 38 private unsafe static void ReadChroma( 39 Span<byte> dstU, 40 Span<byte> dstV, 41 ReadOnlySpan<byte> src, 42 int dstStride, 43 int width, 44 int height) 45 { 46 OffsetCalculator calc = new(width, height, 0, false, 2, 2); 47 48 if (Sse2.IsSupported) 49 { 50 int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); 51 52 int outStrideGap = dstStride - width; 53 54 fixed (byte* dstUPtr = dstU, dstVPtr = dstV, dataPtr = src) 55 { 56 byte* uPtr = dstUPtr; 57 byte* vPtr = dstVPtr; 58 59 for (int y = 0; y < height; y++) 60 { 61 calc.SetY(y); 62 63 for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32) 64 { 65 byte* offset = dataPtr + calc.GetOffsetWithLineOffset64(x); 66 byte* offset2 = offset + 0x20; 67 byte* offset3 = offset + 0x100; 68 byte* offset4 = offset + 0x120; 69 70 Vector128<byte> value = *(Vector128<byte>*)offset; 71 Vector128<byte> value2 = *(Vector128<byte>*)offset2; 72 Vector128<byte> value3 = *(Vector128<byte>*)offset3; 73 Vector128<byte> value4 = *(Vector128<byte>*)offset4; 74 75 Vector128<byte> u00 = Sse2.UnpackLow(value, value2); 76 Vector128<byte> v00 = Sse2.UnpackHigh(value, value2); 77 Vector128<byte> u01 = Sse2.UnpackLow(value3, value4); 78 Vector128<byte> v01 = Sse2.UnpackHigh(value3, value4); 79 80 Vector128<byte> u10 = Sse2.UnpackLow(u00, v00); 81 Vector128<byte> v10 = Sse2.UnpackHigh(u00, v00); 82 Vector128<byte> u11 = Sse2.UnpackLow(u01, v01); 83 Vector128<byte> v11 = Sse2.UnpackHigh(u01, v01); 84 85 Vector128<byte> u20 = Sse2.UnpackLow(u10, v10); 86 Vector128<byte> v20 = Sse2.UnpackHigh(u10, v10); 87 Vector128<byte> u21 = Sse2.UnpackLow(u11, v11); 88 Vector128<byte> v21 = Sse2.UnpackHigh(u11, v11); 89 90 Vector128<byte> u30 = Sse2.UnpackLow(u20, v20); 91 Vector128<byte> v30 = Sse2.UnpackHigh(u20, v20); 92 Vector128<byte> u31 = Sse2.UnpackLow(u21, v21); 93 Vector128<byte> v31 = Sse2.UnpackHigh(u21, v21); 94 95 *(Vector128<byte>*)uPtr = u30; 96 *(Vector128<byte>*)(uPtr + 16) = u31; 97 *(Vector128<byte>*)vPtr = v30; 98 *(Vector128<byte>*)(vPtr + 16) = v31; 99 } 100 101 for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++) 102 { 103 byte* offset = dataPtr + calc.GetOffset(x); 104 105 *uPtr = *offset; 106 *vPtr = *(offset + 1); 107 } 108 109 uPtr += outStrideGap; 110 vPtr += outStrideGap; 111 } 112 } 113 } 114 else 115 { 116 for (int y = 0; y < height; y++) 117 { 118 int dstBaseOffset = y * dstStride; 119 120 calc.SetY(y); 121 122 for (int x = 0; x < width; x++) 123 { 124 int srcOffset = calc.GetOffset(x); 125 126 dstU[dstBaseOffset + x] = src[srcOffset]; 127 dstV[dstBaseOffset + x] = src[srcOffset + 1]; 128 } 129 } 130 } 131 } 132 } 133 }