SurfaceWriter.cs
1 using Ryujinx.Common; 2 using Ryujinx.Graphics.Device; 3 using Ryujinx.Graphics.Texture; 4 using Ryujinx.Graphics.Video; 5 using System; 6 using System.Runtime.Intrinsics; 7 using System.Runtime.Intrinsics.X86; 8 using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon; 9 using static Ryujinx.Graphics.Nvdec.MemoryExtensions; 10 11 namespace Ryujinx.Graphics.Nvdec.Image 12 { 13 static class SurfaceWriter 14 { 15 public static void Write(DeviceMemoryManager mm, ISurface surface, uint lumaOffset, uint chromaOffset) 16 { 17 int lumaSize = GetBlockLinearSize(surface.Width, surface.Height, 1); 18 19 using var luma = mm.GetWritableRegion(ExtendOffset(lumaOffset), lumaSize); 20 21 WriteLuma( 22 luma.Memory.Span, 23 surface.YPlane.AsSpan(), 24 surface.Stride, 25 surface.Width, 26 surface.Height); 27 28 int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight, 2); 29 30 using var chroma = mm.GetWritableRegion(ExtendOffset(chromaOffset), chromaSize); 31 32 WriteChroma( 33 chroma.Memory.Span, 34 surface.UPlane.AsSpan(), 35 surface.VPlane.AsSpan(), 36 surface.UvStride, 37 surface.UvWidth, 38 surface.UvHeight); 39 } 40 41 public static void WriteInterlaced( 42 DeviceMemoryManager mm, 43 ISurface surface, 44 uint lumaTopOffset, 45 uint chromaTopOffset, 46 uint lumaBottomOffset, 47 uint chromaBottomOffset) 48 { 49 int lumaSize = GetBlockLinearSize(surface.Width, surface.Height / 2, 1); 50 51 using var lumaTop = mm.GetWritableRegion(ExtendOffset(lumaTopOffset), lumaSize); 52 using var lumaBottom = mm.GetWritableRegion(ExtendOffset(lumaBottomOffset), lumaSize); 53 54 WriteLuma( 55 lumaTop.Memory.Span, 56 surface.YPlane.AsSpan(), 57 surface.Stride * 2, 58 surface.Width, 59 surface.Height / 2); 60 61 WriteLuma( 62 lumaBottom.Memory.Span, 63 surface.YPlane.AsSpan()[surface.Stride..], 64 surface.Stride * 2, 65 surface.Width, 66 surface.Height / 2); 67 68 int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight / 2, 2); 69 70 using var chromaTop = mm.GetWritableRegion(ExtendOffset(chromaTopOffset), chromaSize); 71 using var chromaBottom = mm.GetWritableRegion(ExtendOffset(chromaBottomOffset), chromaSize); 72 73 WriteChroma( 74 chromaTop.Memory.Span, 75 surface.UPlane.AsSpan(), 76 surface.VPlane.AsSpan(), 77 surface.UvStride * 2, 78 surface.UvWidth, 79 surface.UvHeight / 2); 80 81 WriteChroma( 82 chromaBottom.Memory.Span, 83 surface.UPlane.AsSpan()[surface.UvStride..], 84 surface.VPlane.AsSpan()[surface.UvStride..], 85 surface.UvStride * 2, 86 surface.UvWidth, 87 surface.UvHeight / 2); 88 } 89 90 private static void WriteLuma(Span<byte> dst, ReadOnlySpan<byte> src, int srcStride, int width, int height) 91 { 92 LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, srcStride, 1, 2, src); 93 } 94 95 private unsafe static void WriteChroma( 96 Span<byte> dst, 97 ReadOnlySpan<byte> srcU, 98 ReadOnlySpan<byte> srcV, 99 int srcStride, 100 int width, 101 int height) 102 { 103 OffsetCalculator calc = new(width, height, 0, false, 2, 2); 104 105 if (Sse2.IsSupported) 106 { 107 int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); 108 109 int inStrideGap = srcStride - width; 110 111 fixed (byte* outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV) 112 { 113 byte* inUPtr = srcUPtr; 114 byte* inVPtr = srcVPtr; 115 116 for (int y = 0; y < height; y++) 117 { 118 calc.SetY(y); 119 120 for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32) 121 { 122 byte* offset = outputPtr + calc.GetOffsetWithLineOffset64(x); 123 byte* offset2 = offset + 0x20; 124 byte* offset3 = offset + 0x100; 125 byte* offset4 = offset + 0x120; 126 127 Vector128<byte> value = *(Vector128<byte>*)inUPtr; 128 Vector128<byte> value2 = *(Vector128<byte>*)inVPtr; 129 Vector128<byte> value3 = *(Vector128<byte>*)(inUPtr + 16); 130 Vector128<byte> value4 = *(Vector128<byte>*)(inVPtr + 16); 131 132 Vector128<byte> uv0 = Sse2.UnpackLow(value, value2); 133 Vector128<byte> uv1 = Sse2.UnpackHigh(value, value2); 134 Vector128<byte> uv2 = Sse2.UnpackLow(value3, value4); 135 Vector128<byte> uv3 = Sse2.UnpackHigh(value3, value4); 136 137 *(Vector128<byte>*)offset = uv0; 138 *(Vector128<byte>*)offset2 = uv1; 139 *(Vector128<byte>*)offset3 = uv2; 140 *(Vector128<byte>*)offset4 = uv3; 141 } 142 143 for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++) 144 { 145 byte* offset = outputPtr + calc.GetOffset(x); 146 147 *offset = *inUPtr; 148 *(offset + 1) = *inVPtr; 149 } 150 151 inUPtr += inStrideGap; 152 inVPtr += inStrideGap; 153 } 154 } 155 } 156 else 157 { 158 for (int y = 0; y < height; y++) 159 { 160 int srcBaseOffset = y * srcStride; 161 162 calc.SetY(y); 163 164 for (int x = 0; x < width; x++) 165 { 166 int dstOffset = calc.GetOffset(x); 167 168 dst[dstOffset + 0] = srcU[srcBaseOffset + x]; 169 dst[dstOffset + 1] = srcV[srcBaseOffset + x]; 170 } 171 } 172 } 173 } 174 } 175 }