/ src / Ryujinx.Graphics.OpenGL / Image / FormatConverter.cs
FormatConverter.cs
  1  using Ryujinx.Common.Memory;
  2  using System;
  3  using System.Numerics;
  4  using System.Runtime.InteropServices;
  5  using System.Runtime.Intrinsics;
  6  using System.Runtime.Intrinsics.X86;
  7  
  8  namespace Ryujinx.Graphics.OpenGL.Image
  9  {
 10      static class FormatConverter
 11      {
 12          public unsafe static MemoryOwner<byte> ConvertS8D24ToD24S8(ReadOnlySpan<byte> data)
 13          {
 14              MemoryOwner<byte> outputMemory = MemoryOwner<byte>.Rent(data.Length);
 15  
 16              Span<byte> output = outputMemory.Span;
 17  
 18              int start = 0;
 19  
 20              if (Avx2.IsSupported)
 21              {
 22                  var mask = Vector256.Create(
 23                      (byte)3, (byte)0, (byte)1, (byte)2,
 24                      (byte)7, (byte)4, (byte)5, (byte)6,
 25                      (byte)11, (byte)8, (byte)9, (byte)10,
 26                      (byte)15, (byte)12, (byte)13, (byte)14,
 27                      (byte)19, (byte)16, (byte)17, (byte)18,
 28                      (byte)23, (byte)20, (byte)21, (byte)22,
 29                      (byte)27, (byte)24, (byte)25, (byte)26,
 30                      (byte)31, (byte)28, (byte)29, (byte)30);
 31  
 32                  int sizeAligned = data.Length & ~31;
 33  
 34                  fixed (byte* pInput = data, pOutput = output)
 35                  {
 36                      for (uint i = 0; i < sizeAligned; i += 32)
 37                      {
 38                          var dataVec = Avx.LoadVector256(pInput + i);
 39  
 40                          dataVec = Avx2.Shuffle(dataVec, mask);
 41  
 42                          Avx.Store(pOutput + i, dataVec);
 43                      }
 44                  }
 45  
 46                  start = sizeAligned;
 47              }
 48              else if (Ssse3.IsSupported)
 49              {
 50                  var mask = Vector128.Create(
 51                      (byte)3, (byte)0, (byte)1, (byte)2,
 52                      (byte)7, (byte)4, (byte)5, (byte)6,
 53                      (byte)11, (byte)8, (byte)9, (byte)10,
 54                      (byte)15, (byte)12, (byte)13, (byte)14);
 55  
 56                  int sizeAligned = data.Length & ~15;
 57  
 58                  fixed (byte* pInput = data, pOutput = output)
 59                  {
 60                      for (uint i = 0; i < sizeAligned; i += 16)
 61                      {
 62                          var dataVec = Sse2.LoadVector128(pInput + i);
 63  
 64                          dataVec = Ssse3.Shuffle(dataVec, mask);
 65  
 66                          Sse2.Store(pOutput + i, dataVec);
 67                      }
 68                  }
 69  
 70                  start = sizeAligned;
 71              }
 72  
 73              var outSpan = MemoryMarshal.Cast<byte, uint>(output);
 74              var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
 75              for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
 76              {
 77                  outSpan[i] = BitOperations.RotateLeft(dataSpan[i], 8);
 78              }
 79  
 80              return outputMemory;
 81          }
 82  
 83          public unsafe static byte[] ConvertD24S8ToS8D24(ReadOnlySpan<byte> data)
 84          {
 85              byte[] output = new byte[data.Length];
 86  
 87              int start = 0;
 88  
 89              if (Avx2.IsSupported)
 90              {
 91                  var mask = Vector256.Create(
 92                      (byte)1, (byte)2, (byte)3, (byte)0,
 93                      (byte)5, (byte)6, (byte)7, (byte)4,
 94                      (byte)9, (byte)10, (byte)11, (byte)8,
 95                      (byte)13, (byte)14, (byte)15, (byte)12,
 96                      (byte)17, (byte)18, (byte)19, (byte)16,
 97                      (byte)21, (byte)22, (byte)23, (byte)20,
 98                      (byte)25, (byte)26, (byte)27, (byte)24,
 99                      (byte)29, (byte)30, (byte)31, (byte)28);
100  
101                  int sizeAligned = data.Length & ~31;
102  
103                  fixed (byte* pInput = data, pOutput = output)
104                  {
105                      for (uint i = 0; i < sizeAligned; i += 32)
106                      {
107                          var dataVec = Avx.LoadVector256(pInput + i);
108  
109                          dataVec = Avx2.Shuffle(dataVec, mask);
110  
111                          Avx.Store(pOutput + i, dataVec);
112                      }
113                  }
114  
115                  start = sizeAligned;
116              }
117              else if (Ssse3.IsSupported)
118              {
119                  var mask = Vector128.Create(
120                      (byte)1, (byte)2, (byte)3, (byte)0,
121                      (byte)5, (byte)6, (byte)7, (byte)4,
122                      (byte)9, (byte)10, (byte)11, (byte)8,
123                      (byte)13, (byte)14, (byte)15, (byte)12);
124  
125                  int sizeAligned = data.Length & ~15;
126  
127                  fixed (byte* pInput = data, pOutput = output)
128                  {
129                      for (uint i = 0; i < sizeAligned; i += 16)
130                      {
131                          var dataVec = Sse2.LoadVector128(pInput + i);
132  
133                          dataVec = Ssse3.Shuffle(dataVec, mask);
134  
135                          Sse2.Store(pOutput + i, dataVec);
136                      }
137                  }
138  
139                  start = sizeAligned;
140              }
141  
142              var outSpan = MemoryMarshal.Cast<byte, uint>(output);
143              var dataSpan = MemoryMarshal.Cast<byte, uint>(data);
144              for (int i = start / sizeof(uint); i < dataSpan.Length; i++)
145              {
146                  outSpan[i] = BitOperations.RotateRight(dataSpan[i], 8);
147              }
148  
149              return output;
150          }
151      }
152  }