SurfaceReader.cs
1 using Ryujinx.Common.Logging; 2 using Ryujinx.Common.Memory; 3 using Ryujinx.Graphics.Texture; 4 using Ryujinx.Graphics.Vic.Types; 5 using System; 6 using System.Runtime.CompilerServices; 7 using System.Runtime.Intrinsics; 8 using System.Runtime.Intrinsics.Arm; 9 using System.Runtime.Intrinsics.X86; 10 using static Ryujinx.Graphics.Vic.Image.SurfaceCommon; 11 12 namespace Ryujinx.Graphics.Vic.Image 13 { 14 static class SurfaceReader 15 { 16 public static Surface Read( 17 ResourceManager rm, 18 ref SlotConfig config, 19 ref SlotSurfaceConfig surfaceConfig, 20 ref Array8<PlaneOffsets> offsets) 21 { 22 switch (surfaceConfig.SlotPixelFormat) 23 { 24 case PixelFormat.Y8___V8U8_N420: 25 return ReadNv12(rm, ref config, ref surfaceConfig, ref offsets); 26 } 27 28 Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{surfaceConfig.SlotPixelFormat}\"."); 29 30 int lw = surfaceConfig.SlotLumaWidth + 1; 31 int lh = surfaceConfig.SlotLumaHeight + 1; 32 33 return new Surface(rm.SurfacePool, lw, lh); 34 } 35 36 private unsafe static Surface ReadNv12( 37 ResourceManager rm, 38 ref SlotConfig config, 39 ref SlotSurfaceConfig surfaceConfig, 40 ref Array8<PlaneOffsets> offsets) 41 { 42 InputSurface input = ReadSurface(rm, ref config, ref surfaceConfig, ref offsets, 1, 2); 43 44 int width = input.Width; 45 int height = input.Height; 46 47 int yStride = GetPitch(width, 1); 48 int uvStride = GetPitch(input.UvWidth, 2); 49 50 Surface output = new(rm.SurfacePool, width, height); 51 52 if (Sse41.IsSupported) 53 { 54 Vector128<byte> shufMask = Vector128.Create( 55 (byte)0, (byte)2, (byte)3, (byte)1, 56 (byte)4, (byte)6, (byte)7, (byte)5, 57 (byte)8, (byte)10, (byte)11, (byte)9, 58 (byte)12, (byte)14, (byte)15, (byte)13); 59 Vector128<short> alphaMask = Vector128.Create(0xff << 24).AsInt16(); 60 61 int yStrideGap = yStride - width; 62 int uvStrideGap = uvStride - input.UvWidth; 63 64 int widthTrunc = width & ~0xf; 65 66 fixed (Pixel* dstPtr = output.Data) 67 { 68 Pixel* op = dstPtr; 69 70 fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) 71 { 72 byte* i0p = src0Ptr; 73 74 for (int y = 0; y < height; y++) 75 { 76 byte* i1p = src1Ptr + (y >> 1) * uvStride; 77 78 int x = 0; 79 80 for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) 81 { 82 Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p); 83 Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8); 84 85 Vector128<byte> uv = Sse2.LoadVector128(i1p); 86 87 Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16()); 88 Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16()); 89 90 Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0); 91 Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0); 92 Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1); 93 Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1); 94 95 rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16(); 96 rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16(); 97 rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16(); 98 rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16(); 99 100 rgba0 = Sse2.Or(rgba0, alphaMask); 101 rgba1 = Sse2.Or(rgba1, alphaMask); 102 rgba2 = Sse2.Or(rgba2, alphaMask); 103 rgba3 = Sse2.Or(rgba3, alphaMask); 104 105 Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte()); 106 Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte())); 107 Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte()); 108 Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte())); 109 Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte()); 110 Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte())); 111 Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte()); 112 Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte())); 113 114 rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2); 115 rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2); 116 rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2); 117 rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2); 118 rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2); 119 rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2); 120 rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2); 121 rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2); 122 123 Sse2.Store((short*)(op + (uint)x + 0), rgba16_0); 124 Sse2.Store((short*)(op + (uint)x + 2), rgba16_1); 125 Sse2.Store((short*)(op + (uint)x + 4), rgba16_2); 126 Sse2.Store((short*)(op + (uint)x + 6), rgba16_3); 127 Sse2.Store((short*)(op + (uint)x + 8), rgba16_4); 128 Sse2.Store((short*)(op + (uint)x + 10), rgba16_5); 129 Sse2.Store((short*)(op + (uint)x + 12), rgba16_6); 130 Sse2.Store((short*)(op + (uint)x + 14), rgba16_7); 131 } 132 133 for (; x < width; x++, i1p += (x & 1) * 2) 134 { 135 Pixel* px = op + (uint)x; 136 137 px->R = Upsample(*i0p++); 138 px->G = Upsample(*i1p); 139 px->B = Upsample(*(i1p + 1)); 140 px->A = 0x3ff; 141 } 142 143 op += width; 144 i0p += yStrideGap; 145 i1p += uvStrideGap; 146 } 147 } 148 } 149 } 150 else if (AdvSimd.Arm64.IsSupported) 151 { 152 Vector128<int> alphaMask = Vector128.Create(0xffu << 24).AsInt32(); 153 154 int yStrideGap = yStride - width; 155 int uvStrideGap = uvStride - input.UvWidth; 156 157 int widthTrunc = width & ~0xf; 158 159 fixed (Pixel* dstPtr = output.Data) 160 { 161 Pixel* op = dstPtr; 162 163 fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) 164 { 165 byte* i0p = src0Ptr; 166 167 for (int y = 0; y < height; y++) 168 { 169 byte* i1p = src1Ptr + (y >> 1) * uvStride; 170 171 int x = 0; 172 173 for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) 174 { 175 Vector128<byte> ya = AdvSimd.LoadVector128(i0p); 176 Vector128<byte> uv = AdvSimd.LoadVector128(i1p); 177 178 Vector128<short> ya0 = AdvSimd.ZeroExtendWideningLower(ya.GetLower()).AsInt16(); 179 Vector128<short> ya1 = AdvSimd.ZeroExtendWideningUpper(ya).AsInt16(); 180 181 Vector128<short> uv0 = AdvSimd.Arm64.ZipLow(uv.AsInt16(), uv.AsInt16()); 182 Vector128<short> uv1 = AdvSimd.Arm64.ZipHigh(uv.AsInt16(), uv.AsInt16()); 183 184 ya0 = AdvSimd.ShiftLeftLogical(ya0, 8); 185 ya1 = AdvSimd.ShiftLeftLogical(ya1, 8); 186 187 Vector128<short> rgba0 = AdvSimd.Arm64.ZipLow(ya0, uv0); 188 Vector128<short> rgba1 = AdvSimd.Arm64.ZipHigh(ya0, uv0); 189 Vector128<short> rgba2 = AdvSimd.Arm64.ZipLow(ya1, uv1); 190 Vector128<short> rgba3 = AdvSimd.Arm64.ZipHigh(ya1, uv1); 191 192 rgba0 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba0.AsInt32(), 8).AsInt16(); 193 rgba1 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba1.AsInt32(), 8).AsInt16(); 194 rgba2 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba2.AsInt32(), 8).AsInt16(); 195 rgba3 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba3.AsInt32(), 8).AsInt16(); 196 197 Vector128<short> rgba16_0 = AdvSimd.ZeroExtendWideningLower(rgba0.AsByte().GetLower()).AsInt16(); 198 Vector128<short> rgba16_1 = AdvSimd.ZeroExtendWideningUpper(rgba0.AsByte()).AsInt16(); 199 Vector128<short> rgba16_2 = AdvSimd.ZeroExtendWideningLower(rgba1.AsByte().GetLower()).AsInt16(); 200 Vector128<short> rgba16_3 = AdvSimd.ZeroExtendWideningUpper(rgba1.AsByte()).AsInt16(); 201 Vector128<short> rgba16_4 = AdvSimd.ZeroExtendWideningLower(rgba2.AsByte().GetLower()).AsInt16(); 202 Vector128<short> rgba16_5 = AdvSimd.ZeroExtendWideningUpper(rgba2.AsByte()).AsInt16(); 203 Vector128<short> rgba16_6 = AdvSimd.ZeroExtendWideningLower(rgba3.AsByte().GetLower()).AsInt16(); 204 Vector128<short> rgba16_7 = AdvSimd.ZeroExtendWideningUpper(rgba3.AsByte()).AsInt16(); 205 206 rgba16_0 = AdvSimd.ShiftLeftLogical(rgba16_0, 2); 207 rgba16_1 = AdvSimd.ShiftLeftLogical(rgba16_1, 2); 208 rgba16_2 = AdvSimd.ShiftLeftLogical(rgba16_2, 2); 209 rgba16_3 = AdvSimd.ShiftLeftLogical(rgba16_3, 2); 210 rgba16_4 = AdvSimd.ShiftLeftLogical(rgba16_4, 2); 211 rgba16_5 = AdvSimd.ShiftLeftLogical(rgba16_5, 2); 212 rgba16_6 = AdvSimd.ShiftLeftLogical(rgba16_6, 2); 213 rgba16_7 = AdvSimd.ShiftLeftLogical(rgba16_7, 2); 214 215 AdvSimd.Store((short*)(op + (uint)x + 0), rgba16_0); 216 AdvSimd.Store((short*)(op + (uint)x + 2), rgba16_1); 217 AdvSimd.Store((short*)(op + (uint)x + 4), rgba16_2); 218 AdvSimd.Store((short*)(op + (uint)x + 6), rgba16_3); 219 AdvSimd.Store((short*)(op + (uint)x + 8), rgba16_4); 220 AdvSimd.Store((short*)(op + (uint)x + 10), rgba16_5); 221 AdvSimd.Store((short*)(op + (uint)x + 12), rgba16_6); 222 AdvSimd.Store((short*)(op + (uint)x + 14), rgba16_7); 223 } 224 225 for (; x < width; x++, i1p += (x & 1) * 2) 226 { 227 Pixel* px = op + (uint)x; 228 229 px->R = Upsample(*i0p++); 230 px->G = Upsample(*i1p); 231 px->B = Upsample(*(i1p + 1)); 232 px->A = 0x3ff; 233 } 234 235 op += width; 236 i0p += yStrideGap; 237 i1p += uvStrideGap; 238 } 239 } 240 } 241 } 242 else 243 { 244 for (int y = 0; y < height; y++) 245 { 246 int uvBase = (y >> 1) * uvStride; 247 248 for (int x = 0; x < width; x++) 249 { 250 output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x])); 251 252 int uvOffs = uvBase + (x & ~1); 253 254 output.SetG(x, y, Upsample(input.Buffer1[uvOffs])); 255 output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1])); 256 output.SetA(x, y, 0x3ff); 257 } 258 } 259 } 260 261 input.Return(rm.BufferPool); 262 263 return output; 264 } 265 266 [MethodImpl(MethodImplOptions.AggressiveInlining)] 267 private static Vector128<byte> HighToLow(Vector128<byte> value) 268 { 269 return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte(); 270 } 271 272 private static InputSurface ReadSurface( 273 ResourceManager rm, 274 ref SlotConfig config, 275 ref SlotSurfaceConfig surfaceConfig, 276 ref Array8<PlaneOffsets> offsets, 277 int bytesPerPixel, 278 int planes) 279 { 280 InputSurface surface = new(); 281 282 surface.Initialize(); 283 284 int gobBlocksInY = 1 << surfaceConfig.SlotBlkHeight; 285 286 bool linear = surfaceConfig.SlotBlkKind == 0; 287 288 int lw = surfaceConfig.SlotLumaWidth + 1; 289 int lh = surfaceConfig.SlotLumaHeight + 1; 290 291 int cw = surfaceConfig.SlotChromaWidth + 1; 292 int ch = surfaceConfig.SlotChromaHeight + 1; 293 294 // Interlaced inputs have double the height when deinterlaced. 295 int heightShift = config.FrameFormat.IsField() ? 1 : 0; 296 297 surface.Width = lw; 298 surface.Height = lh << heightShift; 299 surface.UvWidth = cw; 300 surface.UvHeight = ch << heightShift; 301 302 if (planes > 0) 303 { 304 surface.SetBuffer0(ReadBuffer(rm, ref config, ref offsets, linear, 0, lw, lh, bytesPerPixel, gobBlocksInY)); 305 } 306 307 if (planes > 1) 308 { 309 surface.SetBuffer1(ReadBuffer(rm, ref config, ref offsets, linear, 1, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY)); 310 } 311 312 if (planes > 2) 313 { 314 surface.SetBuffer2(ReadBuffer(rm, ref config, ref offsets, linear, 2, cw, ch, 1, gobBlocksInY)); 315 } 316 317 return surface; 318 } 319 320 private static RentedBuffer ReadBuffer( 321 ResourceManager rm, 322 scoped ref SlotConfig config, 323 scoped ref Array8<PlaneOffsets> offsets, 324 bool linear, 325 int plane, 326 int width, 327 int height, 328 int bytesPerPixel, 329 int gobBlocksInY) 330 { 331 FrameFormat frameFormat = config.FrameFormat; 332 bool isLuma = plane == 0; 333 bool isField = frameFormat.IsField(); 334 bool isTopField = frameFormat.IsTopField(isLuma); 335 int stride = GetPitch(width, bytesPerPixel); 336 uint offset = GetOffset(ref offsets[0], plane); 337 338 int dstStart = 0; 339 int dstStride = stride; 340 341 if (isField) 342 { 343 dstStart = isTopField ? 0 : stride; 344 dstStride = stride * 2; 345 } 346 347 RentedBuffer buffer; 348 349 if (linear) 350 { 351 buffer = ReadBufferLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel); 352 } 353 else 354 { 355 buffer = ReadBufferBlockLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel, gobBlocksInY); 356 } 357 358 if (isField || frameFormat.IsInterlaced()) 359 { 360 RentedBuffer prevBuffer = RentedBuffer.Empty; 361 RentedBuffer nextBuffer = RentedBuffer.Empty; 362 363 if (config.PrevFieldEnable) 364 { 365 prevBuffer = ReadBufferNoDeinterlace(rm, ref offsets[1], linear, plane, width, height, bytesPerPixel, gobBlocksInY); 366 } 367 368 if (config.NextFieldEnable) 369 { 370 nextBuffer = ReadBufferNoDeinterlace(rm, ref offsets[2], linear, plane, width, height, bytesPerPixel, gobBlocksInY); 371 } 372 373 int w = width * bytesPerPixel; 374 375 switch (config.DeinterlaceMode) 376 { 377 case DeinterlaceMode.Weave: 378 Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); 379 break; 380 case DeinterlaceMode.BobField: 381 Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); 382 break; 383 case DeinterlaceMode.Bob: 384 bool isCurrentTop = isLuma ? config.IsEven : config.ChromaEven; 385 Scaler.DeinterlaceBob(buffer.Data, w, stride, isCurrentTop ^ frameFormat.IsInterlacedBottomFirst()); 386 break; 387 case DeinterlaceMode.NewBob: 388 case DeinterlaceMode.Disi1: 389 Scaler.DeinterlaceMotionAdaptive(buffer.Data, prevBuffer.Data, nextBuffer.Data, w, stride, isTopField); 390 break; 391 case DeinterlaceMode.WeaveLumaBobFieldChroma: 392 if (isLuma) 393 { 394 Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); 395 } 396 else 397 { 398 Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); 399 } 400 break; 401 default: 402 Logger.Error?.Print(LogClass.Vic, $"Unsupported deinterlace mode \"{config.DeinterlaceMode}\"."); 403 break; 404 } 405 406 prevBuffer.Return(rm.BufferPool); 407 nextBuffer.Return(rm.BufferPool); 408 } 409 410 return buffer; 411 } 412 413 private static uint GetOffset(ref PlaneOffsets offsets, int plane) 414 { 415 return plane switch 416 { 417 0 => offsets.LumaOffset, 418 1 => offsets.ChromaUOffset, 419 2 => offsets.ChromaVOffset, 420 _ => throw new ArgumentOutOfRangeException(nameof(plane)), 421 }; 422 } 423 424 private static RentedBuffer ReadBufferNoDeinterlace( 425 ResourceManager rm, 426 ref PlaneOffsets offsets, 427 bool linear, 428 int plane, 429 int width, 430 int height, 431 int bytesPerPixel, 432 int gobBlocksInY) 433 { 434 int stride = GetPitch(width, bytesPerPixel); 435 uint offset = GetOffset(ref offsets, plane); 436 437 if (linear) 438 { 439 return ReadBufferLinear(rm, offset, width, height, 0, stride, bytesPerPixel); 440 } 441 442 return ReadBufferBlockLinear(rm, offset, width, height, 0, stride, bytesPerPixel, gobBlocksInY); 443 } 444 445 private static RentedBuffer ReadBufferLinear( 446 ResourceManager rm, 447 uint offset, 448 int width, 449 int height, 450 int dstStart, 451 int dstStride, 452 int bytesPerPixel) 453 { 454 int srcStride = GetPitch(width, bytesPerPixel); 455 int inSize = srcStride * height; 456 457 ReadOnlySpan<byte> src = rm.MemoryManager.GetSpan(ExtendOffset(offset), inSize); 458 459 int outSize = dstStride * height; 460 int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); 461 Span<byte> dst = buffer; 462 dst = dst[..outSize]; 463 464 for (int y = 0; y < height; y++) 465 { 466 src.Slice(y * srcStride, srcStride).CopyTo(dst.Slice(dstStart + y * dstStride, srcStride)); 467 } 468 469 return new RentedBuffer(dst, bufferIndex); 470 } 471 472 private static RentedBuffer ReadBufferBlockLinear( 473 ResourceManager rm, 474 uint offset, 475 int width, 476 int height, 477 int dstStart, 478 int dstStride, 479 int bytesPerPixel, 480 int gobBlocksInY) 481 { 482 int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); 483 484 ReadOnlySpan<byte> src = rm.MemoryManager.GetSpan(ExtendOffset(offset), inSize); 485 486 int outSize = dstStride * height; 487 int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); 488 Span<byte> dst = buffer; 489 dst = dst[..outSize]; 490 491 LayoutConverter.ConvertBlockLinearToLinear(dst[dstStart..], width, height, dstStride, bytesPerPixel, gobBlocksInY, src); 492 493 return new RentedBuffer(dst, bufferIndex); 494 } 495 } 496 }