/ src / Ryujinx.Graphics.Vic / Image / SurfaceReader.cs
SurfaceReader.cs
  1  using Ryujinx.Common.Logging;
  2  using Ryujinx.Common.Memory;
  3  using Ryujinx.Graphics.Texture;
  4  using Ryujinx.Graphics.Vic.Types;
  5  using System;
  6  using System.Runtime.CompilerServices;
  7  using System.Runtime.Intrinsics;
  8  using System.Runtime.Intrinsics.Arm;
  9  using System.Runtime.Intrinsics.X86;
 10  using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
 11  
 12  namespace Ryujinx.Graphics.Vic.Image
 13  {
 14      static class SurfaceReader
 15      {
 16          public static Surface Read(
 17              ResourceManager rm,
 18              ref SlotConfig config,
 19              ref SlotSurfaceConfig surfaceConfig,
 20              ref Array8<PlaneOffsets> offsets)
 21          {
 22              switch (surfaceConfig.SlotPixelFormat)
 23              {
 24                  case PixelFormat.Y8___V8U8_N420:
 25                      return ReadNv12(rm, ref config, ref surfaceConfig, ref offsets);
 26              }
 27  
 28              Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{surfaceConfig.SlotPixelFormat}\".");
 29  
 30              int lw = surfaceConfig.SlotLumaWidth + 1;
 31              int lh = surfaceConfig.SlotLumaHeight + 1;
 32  
 33              return new Surface(rm.SurfacePool, lw, lh);
 34          }
 35  
 36          private unsafe static Surface ReadNv12(
 37              ResourceManager rm,
 38              ref SlotConfig config,
 39              ref SlotSurfaceConfig surfaceConfig,
 40              ref Array8<PlaneOffsets> offsets)
 41          {
 42              InputSurface input = ReadSurface(rm, ref config, ref surfaceConfig, ref offsets, 1, 2);
 43  
 44              int width = input.Width;
 45              int height = input.Height;
 46  
 47              int yStride = GetPitch(width, 1);
 48              int uvStride = GetPitch(input.UvWidth, 2);
 49  
 50              Surface output = new(rm.SurfacePool, width, height);
 51  
 52              if (Sse41.IsSupported)
 53              {
 54                  Vector128<byte> shufMask = Vector128.Create(
 55                      (byte)0, (byte)2, (byte)3, (byte)1,
 56                      (byte)4, (byte)6, (byte)7, (byte)5,
 57                      (byte)8, (byte)10, (byte)11, (byte)9,
 58                      (byte)12, (byte)14, (byte)15, (byte)13);
 59                  Vector128<short> alphaMask = Vector128.Create(0xff << 24).AsInt16();
 60  
 61                  int yStrideGap = yStride - width;
 62                  int uvStrideGap = uvStride - input.UvWidth;
 63  
 64                  int widthTrunc = width & ~0xf;
 65  
 66                  fixed (Pixel* dstPtr = output.Data)
 67                  {
 68                      Pixel* op = dstPtr;
 69  
 70                      fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
 71                      {
 72                          byte* i0p = src0Ptr;
 73  
 74                          for (int y = 0; y < height; y++)
 75                          {
 76                              byte* i1p = src1Ptr + (y >> 1) * uvStride;
 77  
 78                              int x = 0;
 79  
 80                              for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
 81                              {
 82                                  Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p);
 83                                  Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8);
 84  
 85                                  Vector128<byte> uv = Sse2.LoadVector128(i1p);
 86  
 87                                  Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16());
 88                                  Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16());
 89  
 90                                  Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0);
 91                                  Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0);
 92                                  Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1);
 93                                  Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1);
 94  
 95                                  rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16();
 96                                  rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16();
 97                                  rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16();
 98                                  rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16();
 99  
100                                  rgba0 = Sse2.Or(rgba0, alphaMask);
101                                  rgba1 = Sse2.Or(rgba1, alphaMask);
102                                  rgba2 = Sse2.Or(rgba2, alphaMask);
103                                  rgba3 = Sse2.Or(rgba3, alphaMask);
104  
105                                  Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte());
106                                  Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte()));
107                                  Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte());
108                                  Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte()));
109                                  Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte());
110                                  Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte()));
111                                  Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte());
112                                  Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte()));
113  
114                                  rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2);
115                                  rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2);
116                                  rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2);
117                                  rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2);
118                                  rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2);
119                                  rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2);
120                                  rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2);
121                                  rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2);
122  
123                                  Sse2.Store((short*)(op + (uint)x + 0), rgba16_0);
124                                  Sse2.Store((short*)(op + (uint)x + 2), rgba16_1);
125                                  Sse2.Store((short*)(op + (uint)x + 4), rgba16_2);
126                                  Sse2.Store((short*)(op + (uint)x + 6), rgba16_3);
127                                  Sse2.Store((short*)(op + (uint)x + 8), rgba16_4);
128                                  Sse2.Store((short*)(op + (uint)x + 10), rgba16_5);
129                                  Sse2.Store((short*)(op + (uint)x + 12), rgba16_6);
130                                  Sse2.Store((short*)(op + (uint)x + 14), rgba16_7);
131                              }
132  
133                              for (; x < width; x++, i1p += (x & 1) * 2)
134                              {
135                                  Pixel* px = op + (uint)x;
136  
137                                  px->R = Upsample(*i0p++);
138                                  px->G = Upsample(*i1p);
139                                  px->B = Upsample(*(i1p + 1));
140                                  px->A = 0x3ff;
141                              }
142  
143                              op += width;
144                              i0p += yStrideGap;
145                              i1p += uvStrideGap;
146                          }
147                      }
148                  }
149              }
150              else if (AdvSimd.Arm64.IsSupported)
151              {
152                  Vector128<int> alphaMask = Vector128.Create(0xffu << 24).AsInt32();
153  
154                  int yStrideGap = yStride - width;
155                  int uvStrideGap = uvStride - input.UvWidth;
156  
157                  int widthTrunc = width & ~0xf;
158  
159                  fixed (Pixel* dstPtr = output.Data)
160                  {
161                      Pixel* op = dstPtr;
162  
163                      fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
164                      {
165                          byte* i0p = src0Ptr;
166  
167                          for (int y = 0; y < height; y++)
168                          {
169                              byte* i1p = src1Ptr + (y >> 1) * uvStride;
170  
171                              int x = 0;
172  
173                              for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
174                              {
175                                  Vector128<byte> ya = AdvSimd.LoadVector128(i0p);
176                                  Vector128<byte> uv = AdvSimd.LoadVector128(i1p);
177  
178                                  Vector128<short> ya0 = AdvSimd.ZeroExtendWideningLower(ya.GetLower()).AsInt16();
179                                  Vector128<short> ya1 = AdvSimd.ZeroExtendWideningUpper(ya).AsInt16();
180  
181                                  Vector128<short> uv0 = AdvSimd.Arm64.ZipLow(uv.AsInt16(), uv.AsInt16());
182                                  Vector128<short> uv1 = AdvSimd.Arm64.ZipHigh(uv.AsInt16(), uv.AsInt16());
183  
184                                  ya0 = AdvSimd.ShiftLeftLogical(ya0, 8);
185                                  ya1 = AdvSimd.ShiftLeftLogical(ya1, 8);
186  
187                                  Vector128<short> rgba0 = AdvSimd.Arm64.ZipLow(ya0, uv0);
188                                  Vector128<short> rgba1 = AdvSimd.Arm64.ZipHigh(ya0, uv0);
189                                  Vector128<short> rgba2 = AdvSimd.Arm64.ZipLow(ya1, uv1);
190                                  Vector128<short> rgba3 = AdvSimd.Arm64.ZipHigh(ya1, uv1);
191  
192                                  rgba0 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba0.AsInt32(), 8).AsInt16();
193                                  rgba1 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba1.AsInt32(), 8).AsInt16();
194                                  rgba2 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba2.AsInt32(), 8).AsInt16();
195                                  rgba3 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba3.AsInt32(), 8).AsInt16();
196  
197                                  Vector128<short> rgba16_0 = AdvSimd.ZeroExtendWideningLower(rgba0.AsByte().GetLower()).AsInt16();
198                                  Vector128<short> rgba16_1 = AdvSimd.ZeroExtendWideningUpper(rgba0.AsByte()).AsInt16();
199                                  Vector128<short> rgba16_2 = AdvSimd.ZeroExtendWideningLower(rgba1.AsByte().GetLower()).AsInt16();
200                                  Vector128<short> rgba16_3 = AdvSimd.ZeroExtendWideningUpper(rgba1.AsByte()).AsInt16();
201                                  Vector128<short> rgba16_4 = AdvSimd.ZeroExtendWideningLower(rgba2.AsByte().GetLower()).AsInt16();
202                                  Vector128<short> rgba16_5 = AdvSimd.ZeroExtendWideningUpper(rgba2.AsByte()).AsInt16();
203                                  Vector128<short> rgba16_6 = AdvSimd.ZeroExtendWideningLower(rgba3.AsByte().GetLower()).AsInt16();
204                                  Vector128<short> rgba16_7 = AdvSimd.ZeroExtendWideningUpper(rgba3.AsByte()).AsInt16();
205  
206                                  rgba16_0 = AdvSimd.ShiftLeftLogical(rgba16_0, 2);
207                                  rgba16_1 = AdvSimd.ShiftLeftLogical(rgba16_1, 2);
208                                  rgba16_2 = AdvSimd.ShiftLeftLogical(rgba16_2, 2);
209                                  rgba16_3 = AdvSimd.ShiftLeftLogical(rgba16_3, 2);
210                                  rgba16_4 = AdvSimd.ShiftLeftLogical(rgba16_4, 2);
211                                  rgba16_5 = AdvSimd.ShiftLeftLogical(rgba16_5, 2);
212                                  rgba16_6 = AdvSimd.ShiftLeftLogical(rgba16_6, 2);
213                                  rgba16_7 = AdvSimd.ShiftLeftLogical(rgba16_7, 2);
214  
215                                  AdvSimd.Store((short*)(op + (uint)x + 0), rgba16_0);
216                                  AdvSimd.Store((short*)(op + (uint)x + 2), rgba16_1);
217                                  AdvSimd.Store((short*)(op + (uint)x + 4), rgba16_2);
218                                  AdvSimd.Store((short*)(op + (uint)x + 6), rgba16_3);
219                                  AdvSimd.Store((short*)(op + (uint)x + 8), rgba16_4);
220                                  AdvSimd.Store((short*)(op + (uint)x + 10), rgba16_5);
221                                  AdvSimd.Store((short*)(op + (uint)x + 12), rgba16_6);
222                                  AdvSimd.Store((short*)(op + (uint)x + 14), rgba16_7);
223                              }
224  
225                              for (; x < width; x++, i1p += (x & 1) * 2)
226                              {
227                                  Pixel* px = op + (uint)x;
228  
229                                  px->R = Upsample(*i0p++);
230                                  px->G = Upsample(*i1p);
231                                  px->B = Upsample(*(i1p + 1));
232                                  px->A = 0x3ff;
233                              }
234  
235                              op += width;
236                              i0p += yStrideGap;
237                              i1p += uvStrideGap;
238                          }
239                      }
240                  }
241              }
242              else
243              {
244                  for (int y = 0; y < height; y++)
245                  {
246                      int uvBase = (y >> 1) * uvStride;
247  
248                      for (int x = 0; x < width; x++)
249                      {
250                          output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x]));
251  
252                          int uvOffs = uvBase + (x & ~1);
253  
254                          output.SetG(x, y, Upsample(input.Buffer1[uvOffs]));
255                          output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1]));
256                          output.SetA(x, y, 0x3ff);
257                      }
258                  }
259              }
260  
261              input.Return(rm.BufferPool);
262  
263              return output;
264          }
265  
266          [MethodImpl(MethodImplOptions.AggressiveInlining)]
267          private static Vector128<byte> HighToLow(Vector128<byte> value)
268          {
269              return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte();
270          }
271  
272          private static InputSurface ReadSurface(
273              ResourceManager rm,
274              ref SlotConfig config,
275              ref SlotSurfaceConfig surfaceConfig,
276              ref Array8<PlaneOffsets> offsets,
277              int bytesPerPixel,
278              int planes)
279          {
280              InputSurface surface = new();
281  
282              surface.Initialize();
283  
284              int gobBlocksInY = 1 << surfaceConfig.SlotBlkHeight;
285  
286              bool linear = surfaceConfig.SlotBlkKind == 0;
287  
288              int lw = surfaceConfig.SlotLumaWidth + 1;
289              int lh = surfaceConfig.SlotLumaHeight + 1;
290  
291              int cw = surfaceConfig.SlotChromaWidth + 1;
292              int ch = surfaceConfig.SlotChromaHeight + 1;
293  
294              // Interlaced inputs have double the height when deinterlaced.
295              int heightShift = config.FrameFormat.IsField() ? 1 : 0;
296  
297              surface.Width = lw;
298              surface.Height = lh << heightShift;
299              surface.UvWidth = cw;
300              surface.UvHeight = ch << heightShift;
301  
302              if (planes > 0)
303              {
304                  surface.SetBuffer0(ReadBuffer(rm, ref config, ref offsets, linear, 0, lw, lh, bytesPerPixel, gobBlocksInY));
305              }
306  
307              if (planes > 1)
308              {
309                  surface.SetBuffer1(ReadBuffer(rm, ref config, ref offsets, linear, 1, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY));
310              }
311  
312              if (planes > 2)
313              {
314                  surface.SetBuffer2(ReadBuffer(rm, ref config, ref offsets, linear, 2, cw, ch, 1, gobBlocksInY));
315              }
316  
317              return surface;
318          }
319  
320          private static RentedBuffer ReadBuffer(
321              ResourceManager rm,
322              scoped ref SlotConfig config,
323              scoped ref Array8<PlaneOffsets> offsets,
324              bool linear,
325              int plane,
326              int width,
327              int height,
328              int bytesPerPixel,
329              int gobBlocksInY)
330          {
331              FrameFormat frameFormat = config.FrameFormat;
332              bool isLuma = plane == 0;
333              bool isField = frameFormat.IsField();
334              bool isTopField = frameFormat.IsTopField(isLuma);
335              int stride = GetPitch(width, bytesPerPixel);
336              uint offset = GetOffset(ref offsets[0], plane);
337  
338              int dstStart = 0;
339              int dstStride = stride;
340  
341              if (isField)
342              {
343                  dstStart = isTopField ? 0 : stride;
344                  dstStride = stride * 2;
345              }
346  
347              RentedBuffer buffer;
348  
349              if (linear)
350              {
351                  buffer = ReadBufferLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel);
352              }
353              else
354              {
355                  buffer = ReadBufferBlockLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel, gobBlocksInY);
356              }
357  
358              if (isField || frameFormat.IsInterlaced())
359              {
360                  RentedBuffer prevBuffer = RentedBuffer.Empty;
361                  RentedBuffer nextBuffer = RentedBuffer.Empty;
362  
363                  if (config.PrevFieldEnable)
364                  {
365                      prevBuffer = ReadBufferNoDeinterlace(rm, ref offsets[1], linear, plane, width, height, bytesPerPixel, gobBlocksInY);
366                  }
367  
368                  if (config.NextFieldEnable)
369                  {
370                      nextBuffer = ReadBufferNoDeinterlace(rm, ref offsets[2], linear, plane, width, height, bytesPerPixel, gobBlocksInY);
371                  }
372  
373                  int w = width * bytesPerPixel;
374  
375                  switch (config.DeinterlaceMode)
376                  {
377                      case DeinterlaceMode.Weave:
378                          Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField);
379                          break;
380                      case DeinterlaceMode.BobField:
381                          Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField);
382                          break;
383                      case DeinterlaceMode.Bob:
384                          bool isCurrentTop = isLuma ? config.IsEven : config.ChromaEven;
385                          Scaler.DeinterlaceBob(buffer.Data, w, stride, isCurrentTop ^ frameFormat.IsInterlacedBottomFirst());
386                          break;
387                      case DeinterlaceMode.NewBob:
388                      case DeinterlaceMode.Disi1:
389                          Scaler.DeinterlaceMotionAdaptive(buffer.Data, prevBuffer.Data, nextBuffer.Data, w, stride, isTopField);
390                          break;
391                      case DeinterlaceMode.WeaveLumaBobFieldChroma:
392                          if (isLuma)
393                          {
394                              Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField);
395                          }
396                          else
397                          {
398                              Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField);
399                          }
400                          break;
401                      default:
402                          Logger.Error?.Print(LogClass.Vic, $"Unsupported deinterlace mode \"{config.DeinterlaceMode}\".");
403                          break;
404                  }
405  
406                  prevBuffer.Return(rm.BufferPool);
407                  nextBuffer.Return(rm.BufferPool);
408              }
409  
410              return buffer;
411          }
412  
413          private static uint GetOffset(ref PlaneOffsets offsets, int plane)
414          {
415              return plane switch
416              {
417                  0 => offsets.LumaOffset,
418                  1 => offsets.ChromaUOffset,
419                  2 => offsets.ChromaVOffset,
420                  _ => throw new ArgumentOutOfRangeException(nameof(plane)),
421              };
422          }
423  
424          private static RentedBuffer ReadBufferNoDeinterlace(
425              ResourceManager rm,
426              ref PlaneOffsets offsets,
427              bool linear,
428              int plane,
429              int width,
430              int height,
431              int bytesPerPixel,
432              int gobBlocksInY)
433          {
434              int stride = GetPitch(width, bytesPerPixel);
435              uint offset = GetOffset(ref offsets, plane);
436  
437              if (linear)
438              {
439                  return ReadBufferLinear(rm, offset, width, height, 0, stride, bytesPerPixel);
440              }
441  
442              return ReadBufferBlockLinear(rm, offset, width, height, 0, stride, bytesPerPixel, gobBlocksInY);
443          }
444  
445          private static RentedBuffer ReadBufferLinear(
446              ResourceManager rm,
447              uint offset,
448              int width,
449              int height,
450              int dstStart,
451              int dstStride,
452              int bytesPerPixel)
453          {
454              int srcStride = GetPitch(width, bytesPerPixel);
455              int inSize = srcStride * height;
456  
457              ReadOnlySpan<byte> src = rm.MemoryManager.GetSpan(ExtendOffset(offset), inSize);
458  
459              int outSize = dstStride * height;
460              int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer);
461              Span<byte> dst = buffer;
462              dst = dst[..outSize];
463  
464              for (int y = 0; y < height; y++)
465              {
466                  src.Slice(y * srcStride, srcStride).CopyTo(dst.Slice(dstStart + y * dstStride, srcStride));
467              }
468  
469              return new RentedBuffer(dst, bufferIndex);
470          }
471  
472          private static RentedBuffer ReadBufferBlockLinear(
473              ResourceManager rm,
474              uint offset,
475              int width,
476              int height,
477              int dstStart,
478              int dstStride,
479              int bytesPerPixel,
480              int gobBlocksInY)
481          {
482              int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY);
483  
484              ReadOnlySpan<byte> src = rm.MemoryManager.GetSpan(ExtendOffset(offset), inSize);
485  
486              int outSize = dstStride * height;
487              int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer);
488              Span<byte> dst = buffer;
489              dst = dst[..outSize];
490  
491              LayoutConverter.ConvertBlockLinearToLinear(dst[dstStart..], width, height, dstStride, bytesPerPixel, gobBlocksInY, src);
492  
493              return new RentedBuffer(dst, bufferIndex);
494          }
495      }
496  }