Idct.cs
1 using Ryujinx.Graphics.Nvdec.Vp9.Common; 2 using Ryujinx.Graphics.Nvdec.Vp9.Types; 3 using System; 4 using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm; 5 6 namespace Ryujinx.Graphics.Nvdec.Vp9 7 { 8 internal static class Idct 9 { 10 private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output); 11 private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd); 12 13 private struct Transform2D 14 { 15 public Transform1D Cols, Rows; // Vertical and horizontal 16 17 public Transform2D(Transform1D cols, Transform1D rows) 18 { 19 Cols = cols; 20 Rows = rows; 21 } 22 } 23 24 private struct HighbdTransform2D 25 { 26 public HighbdTransform1D Cols, Rows; // Vertical and horizontal 27 28 public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows) 29 { 30 Cols = cols; 31 Rows = rows; 32 } 33 } 34 35 private static readonly Transform2D[] _iht4 = { 36 new(Idct4, Idct4), // DCT_DCT = 0 37 new(Iadst4, Idct4), // ADST_DCT = 1 38 new(Idct4, Iadst4), // DCT_ADST = 2 39 new(Iadst4, Iadst4), // ADST_ADST = 3 40 }; 41 42 public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType) 43 { 44 int i, j; 45 Span<int> output = stackalloc int[4 * 4]; 46 Span<int> outptr = output; 47 Span<int> tempIn = stackalloc int[4]; 48 Span<int> tempOut = stackalloc int[4]; 49 50 // Inverse transform row vectors 51 for (i = 0; i < 4; ++i) 52 { 53 _iht4[txType].Rows(input, outptr); 54 input = input[4..]; 55 outptr = outptr[4..]; 56 } 57 58 // Inverse transform column vectors 59 for (i = 0; i < 4; ++i) 60 { 61 for (j = 0; j < 4; ++j) 62 { 63 tempIn[j] = output[j * 4 + i]; 64 } 65 66 _iht4[txType].Cols(tempIn, tempOut); 67 for (j = 0; j < 4; ++j) 68 { 69 dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4)); 70 } 71 } 72 } 73 74 private static readonly Transform2D[] _iht8 = { 75 new(Idct8, Idct8), // DCT_DCT = 0 76 new(Iadst8, Idct8), // ADST_DCT = 1 77 new(Idct8, Iadst8), // DCT_ADST = 2 78 new(Iadst8, Iadst8), // ADST_ADST = 3 79 }; 80 81 public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType) 82 { 83 int i, j; 84 Span<int> output = stackalloc int[8 * 8]; 85 Span<int> outptr = output; 86 Span<int> tempIn = stackalloc int[8]; 87 Span<int> tempOut = stackalloc int[8]; 88 Transform2D ht = _iht8[txType]; 89 90 // Inverse transform row vectors 91 for (i = 0; i < 8; ++i) 92 { 93 ht.Rows(input, outptr); 94 input = input[8..]; 95 outptr = outptr[8..]; 96 } 97 98 // Inverse transform column vectors 99 for (i = 0; i < 8; ++i) 100 { 101 for (j = 0; j < 8; ++j) 102 { 103 tempIn[j] = output[j * 8 + i]; 104 } 105 106 ht.Cols(tempIn, tempOut); 107 for (j = 0; j < 8; ++j) 108 { 109 dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5)); 110 } 111 } 112 } 113 114 private static readonly Transform2D[] _iht16 = { 115 new(Idct16, Idct16), // DCT_DCT = 0 116 new(Iadst16, Idct16), // ADST_DCT = 1 117 new(Idct16, Iadst16), // DCT_ADST = 2 118 new(Iadst16, Iadst16), // ADST_ADST = 3 119 }; 120 121 public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType) 122 { 123 int i, j; 124 Span<int> output = stackalloc int[16 * 16]; 125 Span<int> outptr = output; 126 Span<int> tempIn = stackalloc int[16]; 127 Span<int> tempOut = stackalloc int[16]; 128 Transform2D ht = _iht16[txType]; 129 130 // Rows 131 for (i = 0; i < 16; ++i) 132 { 133 ht.Rows(input, outptr); 134 input = input[16..]; 135 outptr = outptr[16..]; 136 } 137 138 // Columns 139 for (i = 0; i < 16; ++i) 140 { 141 for (j = 0; j < 16; ++j) 142 { 143 tempIn[j] = output[j * 16 + i]; 144 } 145 146 ht.Cols(tempIn, tempOut); 147 for (j = 0; j < 16; ++j) 148 { 149 dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6)); 150 } 151 } 152 } 153 154 // Idct 155 public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 156 { 157 if (eob > 1) 158 { 159 Idct4x416Add(input, dest, stride); 160 } 161 else 162 { 163 Idct4x41Add(input, dest, stride); 164 } 165 } 166 167 public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 168 { 169 if (eob > 1) 170 { 171 Iwht4x416Add(input, dest, stride); 172 } 173 else 174 { 175 Iwht4x41Add(input, dest, stride); 176 } 177 } 178 179 public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 180 { 181 // If dc is 1, then input[0] is the reconstructed value, do not need 182 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 183 184 // The calculation can be simplified if there are not many non-zero dct 185 // coefficients. Use eobs to decide what to do. 186 if (eob == 1) 187 { 188 // DC only DCT coefficient 189 Idct8x81Add(input, dest, stride); 190 } 191 else if (eob <= 12) 192 { 193 Idct8x812Add(input, dest, stride); 194 } 195 else 196 { 197 Idct8x864Add(input, dest, stride); 198 } 199 } 200 201 public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 202 { 203 /* The calculation can be simplified if there are not many non-zero dct 204 * coefficients. Use eobs to separate different cases. */ 205 if (eob == 1) /* DC only DCT coefficient. */ 206 { 207 Idct16x161Add(input, dest, stride); 208 } 209 else if (eob <= 10) 210 { 211 Idct16x1610Add(input, dest, stride); 212 } 213 else if (eob <= 38) 214 { 215 Idct16x1638Add(input, dest, stride); 216 } 217 else 218 { 219 Idct16x16256Add(input, dest, stride); 220 } 221 } 222 223 public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 224 { 225 if (eob == 1) 226 { 227 Idct32x321Add(input, dest, stride); 228 } 229 else if (eob <= 34) 230 { 231 // Non-zero coeff only in upper-left 8x8 232 Idct32x3234Add(input, dest, stride); 233 } 234 else if (eob <= 135) 235 { 236 // Non-zero coeff only in upper-left 16x16 237 Idct32x32135Add(input, dest, stride); 238 } 239 else 240 { 241 Idct32x321024Add(input, dest, stride); 242 } 243 } 244 245 // Iht 246 public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 247 { 248 if (txType == TxType.DctDct) 249 { 250 Idct4x4Add(input, dest, stride, eob); 251 } 252 else 253 { 254 Iht4x416Add(input, dest, stride, (int)txType); 255 } 256 } 257 258 public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob) 259 { 260 if (txType == TxType.DctDct) 261 { 262 Idct8x8Add(input, dest, stride, eob); 263 } 264 else 265 { 266 Iht8x864Add(input, dest, stride, (int)txType); 267 } 268 } 269 270 public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, 271 int stride, int eob) 272 { 273 if (txType == TxType.DctDct) 274 { 275 Idct16x16Add(input, dest, stride, eob); 276 } 277 else 278 { 279 Iht16x16256Add(input, dest, stride, (int)txType); 280 } 281 } 282 283 private static readonly HighbdTransform2D[] _highbdIht4 = { 284 new(HighbdIdct4, HighbdIdct4), // DCT_DCT = 0 285 new(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1 286 new(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2 287 new(HighbdIadst4, HighbdIadst4), // ADST_ADST = 3 288 }; 289 290 public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd) 291 { 292 int i, j; 293 Span<int> output = stackalloc int[4 * 4]; 294 Span<int> outptr = output; 295 Span<int> tempIn = stackalloc int[4]; 296 Span<int> tempOut = stackalloc int[4]; 297 298 // Inverse transform row vectors. 299 for (i = 0; i < 4; ++i) 300 { 301 _highbdIht4[txType].Rows(input, outptr, bd); 302 input = input[4..]; 303 outptr = outptr[4..]; 304 } 305 306 // Inverse transform column vectors. 307 for (i = 0; i < 4; ++i) 308 { 309 for (j = 0; j < 4; ++j) 310 { 311 tempIn[j] = output[j * 4 + i]; 312 } 313 314 _highbdIht4[txType].Cols(tempIn, tempOut, bd); 315 for (j = 0; j < 4; ++j) 316 { 317 dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd); 318 } 319 } 320 } 321 322 private static readonly HighbdTransform2D[] _highIht8 = { 323 new(HighbdIdct8, HighbdIdct8), // DCT_DCT = 0 324 new(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1 325 new(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2 326 new(HighbdIadst8, HighbdIadst8), // ADST_ADST = 3 327 }; 328 329 public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd) 330 { 331 int i, j; 332 Span<int> output = stackalloc int[8 * 8]; 333 Span<int> outptr = output; 334 Span<int> tempIn = stackalloc int[8]; 335 Span<int> tempOut = stackalloc int[8]; 336 HighbdTransform2D ht = _highIht8[txType]; 337 338 // Inverse transform row vectors. 339 for (i = 0; i < 8; ++i) 340 { 341 ht.Rows(input, outptr, bd); 342 input = input[8..]; 343 outptr = output[8..]; 344 } 345 346 // Inverse transform column vectors. 347 for (i = 0; i < 8; ++i) 348 { 349 for (j = 0; j < 8; ++j) 350 { 351 tempIn[j] = output[j * 8 + i]; 352 } 353 354 ht.Cols(tempIn, tempOut, bd); 355 for (j = 0; j < 8; ++j) 356 { 357 dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd); 358 } 359 } 360 } 361 362 private static readonly HighbdTransform2D[] _highIht16 = { 363 new(HighbdIdct16, HighbdIdct16), // DCT_DCT = 0 364 new(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1 365 new(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2 366 new(HighbdIadst16, HighbdIadst16), // ADST_ADST = 3 367 }; 368 369 public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd) 370 { 371 int i, j; 372 Span<int> output = stackalloc int[16 * 16]; 373 Span<int> outptr = output; 374 Span<int> tempIn = stackalloc int[16]; 375 Span<int> tempOut = stackalloc int[16]; 376 HighbdTransform2D ht = _highIht16[txType]; 377 378 // Rows 379 for (i = 0; i < 16; ++i) 380 { 381 ht.Rows(input, outptr, bd); 382 input = input[16..]; 383 outptr = output[16..]; 384 } 385 386 // Columns 387 for (i = 0; i < 16; ++i) 388 { 389 for (j = 0; j < 16; ++j) 390 { 391 tempIn[j] = output[j * 16 + i]; 392 } 393 394 ht.Cols(tempIn, tempOut, bd); 395 for (j = 0; j < 16; ++j) 396 { 397 dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd); 398 } 399 } 400 } 401 402 // Idct 403 public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 404 { 405 if (eob > 1) 406 { 407 HighbdIdct4x416Add(input, dest, stride, bd); 408 } 409 else 410 { 411 HighbdIdct4x41Add(input, dest, stride, bd); 412 } 413 } 414 415 public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 416 { 417 if (eob > 1) 418 { 419 HighbdIwht4x416Add(input, dest, stride, bd); 420 } 421 else 422 { 423 HighbdIwht4x41Add(input, dest, stride, bd); 424 } 425 } 426 427 public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 428 { 429 // If dc is 1, then input[0] is the reconstructed value, do not need 430 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 431 432 // The calculation can be simplified if there are not many non-zero dct 433 // coefficients. Use eobs to decide what to do. 434 // DC only DCT coefficient 435 if (eob == 1) 436 { 437 Vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd); 438 } 439 else if (eob <= 12) 440 { 441 HighbdIdct8x812Add(input, dest, stride, bd); 442 } 443 else 444 { 445 HighbdIdct8x864Add(input, dest, stride, bd); 446 } 447 } 448 449 public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 450 { 451 // The calculation can be simplified if there are not many non-zero dct 452 // coefficients. Use eobs to separate different cases. 453 // DC only DCT coefficient. 454 if (eob == 1) 455 { 456 HighbdIdct16x161Add(input, dest, stride, bd); 457 } 458 else if (eob <= 10) 459 { 460 HighbdIdct16x1610Add(input, dest, stride, bd); 461 } 462 else if (eob <= 38) 463 { 464 HighbdIdct16x1638Add(input, dest, stride, bd); 465 } 466 else 467 { 468 HighbdIdct16x16256Add(input, dest, stride, bd); 469 } 470 } 471 472 public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 473 { 474 // Non-zero coeff only in upper-left 8x8 475 if (eob == 1) 476 { 477 HighbdIdct32x321Add(input, dest, stride, bd); 478 } 479 else if (eob <= 34) 480 { 481 HighbdIdct32x3234Add(input, dest, stride, bd); 482 } 483 else if (eob <= 135) 484 { 485 HighbdIdct32x32135Add(input, dest, stride, bd); 486 } 487 else 488 { 489 HighbdIdct32x321024Add(input, dest, stride, bd); 490 } 491 } 492 493 // Iht 494 public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 495 { 496 if (txType == TxType.DctDct) 497 { 498 HighbdIdct4x4Add(input, dest, stride, eob, bd); 499 } 500 else 501 { 502 HighbdIht4x416Add(input, dest, stride, (int)txType, bd); 503 } 504 } 505 506 public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 507 { 508 if (txType == TxType.DctDct) 509 { 510 HighbdIdct8x8Add(input, dest, stride, eob, bd); 511 } 512 else 513 { 514 HighbdIht8x864Add(input, dest, stride, (int)txType, bd); 515 } 516 } 517 518 public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd) 519 { 520 if (txType == TxType.DctDct) 521 { 522 HighbdIdct16x16Add(input, dest, stride, eob, bd); 523 } 524 else 525 { 526 HighbdIht16x16256Add(input, dest, stride, (int)txType, bd); 527 } 528 } 529 } 530 }