FormatConverter.cs
1 using Ryujinx.Common.Memory; 2 using System; 3 using System.Numerics; 4 using System.Runtime.InteropServices; 5 using System.Runtime.Intrinsics; 6 using System.Runtime.Intrinsics.X86; 7 8 namespace Ryujinx.Graphics.OpenGL.Image 9 { 10 static class FormatConverter 11 { 12 public unsafe static MemoryOwner<byte> ConvertS8D24ToD24S8(ReadOnlySpan<byte> data) 13 { 14 MemoryOwner<byte> outputMemory = MemoryOwner<byte>.Rent(data.Length); 15 16 Span<byte> output = outputMemory.Span; 17 18 int start = 0; 19 20 if (Avx2.IsSupported) 21 { 22 var mask = Vector256.Create( 23 (byte)3, (byte)0, (byte)1, (byte)2, 24 (byte)7, (byte)4, (byte)5, (byte)6, 25 (byte)11, (byte)8, (byte)9, (byte)10, 26 (byte)15, (byte)12, (byte)13, (byte)14, 27 (byte)19, (byte)16, (byte)17, (byte)18, 28 (byte)23, (byte)20, (byte)21, (byte)22, 29 (byte)27, (byte)24, (byte)25, (byte)26, 30 (byte)31, (byte)28, (byte)29, (byte)30); 31 32 int sizeAligned = data.Length & ~31; 33 34 fixed (byte* pInput = data, pOutput = output) 35 { 36 for (uint i = 0; i < sizeAligned; i += 32) 37 { 38 var dataVec = Avx.LoadVector256(pInput + i); 39 40 dataVec = Avx2.Shuffle(dataVec, mask); 41 42 Avx.Store(pOutput + i, dataVec); 43 } 44 } 45 46 start = sizeAligned; 47 } 48 else if (Ssse3.IsSupported) 49 { 50 var mask = Vector128.Create( 51 (byte)3, (byte)0, (byte)1, (byte)2, 52 (byte)7, (byte)4, (byte)5, (byte)6, 53 (byte)11, (byte)8, (byte)9, (byte)10, 54 (byte)15, (byte)12, (byte)13, (byte)14); 55 56 int sizeAligned = data.Length & ~15; 57 58 fixed (byte* pInput = data, pOutput = output) 59 { 60 for (uint i = 0; i < sizeAligned; i += 16) 61 { 62 var dataVec = Sse2.LoadVector128(pInput + i); 63 64 dataVec = Ssse3.Shuffle(dataVec, mask); 65 66 Sse2.Store(pOutput + i, dataVec); 67 } 68 } 69 70 start = sizeAligned; 71 } 72 73 var outSpan = MemoryMarshal.Cast<byte, uint>(output); 74 var dataSpan = MemoryMarshal.Cast<byte, uint>(data); 75 for (int i = start / sizeof(uint); i < dataSpan.Length; i++) 76 { 77 outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8); 78 } 79 80 return outputMemory; 81 } 82 83 public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data) 84 { 85 byte[] output = new byte[data.Length]; 86 87 int start = 0; 88 89 if (Avx2.IsSupported) 90 { 91 var mask = Vector256.Create( 92 (byte)1, (byte)2, (byte)3, (byte)0, 93 (byte)5, (byte)6, (byte)7, (byte)4, 94 (byte)9, (byte)10, (byte)11, (byte)8, 95 (byte)13, (byte)14, (byte)15, (byte)12, 96 (byte)17, (byte)18, (byte)19, (byte)16, 97 (byte)21, (byte)22, (byte)23, (byte)20, 98 (byte)25, (byte)26, (byte)27, (byte)24, 99 (byte)29, (byte)30, (byte)31, (byte)28); 100 101 int sizeAligned = data.Length & ~31; 102 103 fixed (byte* pInput = data, pOutput = output) 104 { 105 for (uint i = 0; i < sizeAligned; i += 32) 106 { 107 var dataVec = Avx.LoadVector256(pInput + i); 108 109 dataVec = Avx2.Shuffle(dataVec, mask); 110 111 Avx.Store(pOutput + i, dataVec); 112 } 113 } 114 115 start = sizeAligned; 116 } 117 else if (Ssse3.IsSupported) 118 { 119 var mask = Vector128.Create( 120 (byte)1, (byte)2, (byte)3, (byte)0, 121 (byte)5, (byte)6, (byte)7, (byte)4, 122 (byte)9, (byte)10, (byte)11, (byte)8, 123 (byte)13, (byte)14, (byte)15, (byte)12); 124 125 int sizeAligned = data.Length & ~15; 126 127 fixed (byte* pInput = data, pOutput = output) 128 { 129 for (uint i = 0; i < sizeAligned; i += 16) 130 { 131 var dataVec = Sse2.LoadVector128(pInput + i); 132 133 dataVec = Ssse3.Shuffle(dataVec, mask); 134 135 Sse2.Store(pOutput + i, dataVec); 136 } 137 } 138 139 start = sizeAligned; 140 } 141 142 var outSpan = MemoryMarshal.Cast<byte, uint>(output); 143 var dataSpan = MemoryMarshal.Cast<byte, uint>(data); 144 for (int i = start / sizeof(uint); i < dataSpan.Length; i++) 145 { 146 outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8); 147 } 148 149 return output; 150 } 151 } 152 }