Idct.cs
  1  using Ryujinx.Graphics.Nvdec.Vp9.Common;
  2  using Ryujinx.Graphics.Nvdec.Vp9.Types;
  3  using System;
  4  using static Ryujinx.Graphics.Nvdec.Vp9.Dsp.InvTxfm;
  5  
  6  namespace Ryujinx.Graphics.Nvdec.Vp9
  7  {
  8      internal static class Idct
  9      {
 10          private delegate void Transform1D(ReadOnlySpan<int> input, Span<int> output);
 11          private delegate void HighbdTransform1D(ReadOnlySpan<int> input, Span<int> output, int bd);
 12  
 13          private struct Transform2D
 14          {
 15              public Transform1D Cols, Rows; // Vertical and horizontal
 16  
 17              public Transform2D(Transform1D cols, Transform1D rows)
 18              {
 19                  Cols = cols;
 20                  Rows = rows;
 21              }
 22          }
 23  
 24          private struct HighbdTransform2D
 25          {
 26              public HighbdTransform1D Cols, Rows; // Vertical and horizontal
 27  
 28              public HighbdTransform2D(HighbdTransform1D cols, HighbdTransform1D rows)
 29              {
 30                  Cols = cols;
 31                  Rows = rows;
 32              }
 33          }
 34  
 35          private static readonly Transform2D[] _iht4 = {
 36              new(Idct4, Idct4), // DCT_DCT  = 0
 37              new(Iadst4, Idct4), // ADST_DCT = 1
 38              new(Idct4, Iadst4), // DCT_ADST = 2
 39              new(Iadst4, Iadst4), // ADST_ADST = 3
 40          };
 41  
 42          public static void Iht4x416Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
 43          {
 44              int i, j;
 45              Span<int> output = stackalloc int[4 * 4];
 46              Span<int> outptr = output;
 47              Span<int> tempIn = stackalloc int[4];
 48              Span<int> tempOut = stackalloc int[4];
 49  
 50              // Inverse transform row vectors
 51              for (i = 0; i < 4; ++i)
 52              {
 53                  _iht4[txType].Rows(input, outptr);
 54                  input = input[4..];
 55                  outptr = outptr[4..];
 56              }
 57  
 58              // Inverse transform column vectors
 59              for (i = 0; i < 4; ++i)
 60              {
 61                  for (j = 0; j < 4; ++j)
 62                  {
 63                      tempIn[j] = output[j * 4 + i];
 64                  }
 65  
 66                  _iht4[txType].Cols(tempIn, tempOut);
 67                  for (j = 0; j < 4; ++j)
 68                  {
 69                      dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4));
 70                  }
 71              }
 72          }
 73  
 74          private static readonly Transform2D[] _iht8 = {
 75              new(Idct8, Idct8), // DCT_DCT  = 0
 76              new(Iadst8, Idct8), // ADST_DCT = 1
 77              new(Idct8, Iadst8), // DCT_ADST = 2
 78              new(Iadst8, Iadst8), // ADST_ADST = 3
 79          };
 80  
 81          public static void Iht8x864Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
 82          {
 83              int i, j;
 84              Span<int> output = stackalloc int[8 * 8];
 85              Span<int> outptr = output;
 86              Span<int> tempIn = stackalloc int[8];
 87              Span<int> tempOut = stackalloc int[8];
 88              Transform2D ht = _iht8[txType];
 89  
 90              // Inverse transform row vectors
 91              for (i = 0; i < 8; ++i)
 92              {
 93                  ht.Rows(input, outptr);
 94                  input = input[8..];
 95                  outptr = outptr[8..];
 96              }
 97  
 98              // Inverse transform column vectors
 99              for (i = 0; i < 8; ++i)
100              {
101                  for (j = 0; j < 8; ++j)
102                  {
103                      tempIn[j] = output[j * 8 + i];
104                  }
105  
106                  ht.Cols(tempIn, tempOut);
107                  for (j = 0; j < 8; ++j)
108                  {
109                      dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5));
110                  }
111              }
112          }
113  
114          private static readonly Transform2D[] _iht16 = {
115              new(Idct16, Idct16), // DCT_DCT  = 0
116              new(Iadst16, Idct16), // ADST_DCT = 1
117              new(Idct16, Iadst16), // DCT_ADST = 2
118              new(Iadst16, Iadst16), // ADST_ADST = 3
119          };
120  
121          public static void Iht16x16256Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int txType)
122          {
123              int i, j;
124              Span<int> output = stackalloc int[16 * 16];
125              Span<int> outptr = output;
126              Span<int> tempIn = stackalloc int[16];
127              Span<int> tempOut = stackalloc int[16];
128              Transform2D ht = _iht16[txType];
129  
130              // Rows
131              for (i = 0; i < 16; ++i)
132              {
133                  ht.Rows(input, outptr);
134                  input = input[16..];
135                  outptr = outptr[16..];
136              }
137  
138              // Columns
139              for (i = 0; i < 16; ++i)
140              {
141                  for (j = 0; j < 16; ++j)
142                  {
143                      tempIn[j] = output[j * 16 + i];
144                  }
145  
146                  ht.Cols(tempIn, tempOut);
147                  for (j = 0; j < 16; ++j)
148                  {
149                      dest[j * stride + i] = ClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6));
150                  }
151              }
152          }
153  
154          // Idct
155          public static void Idct4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
156          {
157              if (eob > 1)
158              {
159                  Idct4x416Add(input, dest, stride);
160              }
161              else
162              {
163                  Idct4x41Add(input, dest, stride);
164              }
165          }
166  
167          public static void Iwht4x4Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
168          {
169              if (eob > 1)
170              {
171                  Iwht4x416Add(input, dest, stride);
172              }
173              else
174              {
175                  Iwht4x41Add(input, dest, stride);
176              }
177          }
178  
179          public static void Idct8x8Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
180          {
181              // If dc is 1, then input[0] is the reconstructed value, do not need
182              // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
183  
184              // The calculation can be simplified if there are not many non-zero dct
185              // coefficients. Use eobs to decide what to do.
186              if (eob == 1)
187              {
188                  // DC only DCT coefficient
189                  Idct8x81Add(input, dest, stride);
190              }
191              else if (eob <= 12)
192              {
193                  Idct8x812Add(input, dest, stride);
194              }
195              else
196              {
197                  Idct8x864Add(input, dest, stride);
198              }
199          }
200  
201          public static void Idct16x16Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
202          {
203              /* The calculation can be simplified if there are not many non-zero dct
204               * coefficients. Use eobs to separate different cases. */
205              if (eob == 1) /* DC only DCT coefficient. */
206              {
207                  Idct16x161Add(input, dest, stride);
208              }
209              else if (eob <= 10)
210              {
211                  Idct16x1610Add(input, dest, stride);
212              }
213              else if (eob <= 38)
214              {
215                  Idct16x1638Add(input, dest, stride);
216              }
217              else
218              {
219                  Idct16x16256Add(input, dest, stride);
220              }
221          }
222  
223          public static void Idct32x32Add(ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
224          {
225              if (eob == 1)
226              {
227                  Idct32x321Add(input, dest, stride);
228              }
229              else if (eob <= 34)
230              {
231                  // Non-zero coeff only in upper-left 8x8
232                  Idct32x3234Add(input, dest, stride);
233              }
234              else if (eob <= 135)
235              {
236                  // Non-zero coeff only in upper-left 16x16
237                  Idct32x32135Add(input, dest, stride);
238              }
239              else
240              {
241                  Idct32x321024Add(input, dest, stride);
242              }
243          }
244  
245          // Iht
246          public static void Iht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
247          {
248              if (txType == TxType.DctDct)
249              {
250                  Idct4x4Add(input, dest, stride, eob);
251              }
252              else
253              {
254                  Iht4x416Add(input, dest, stride, (int)txType);
255              }
256          }
257  
258          public static void Iht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest, int stride, int eob)
259          {
260              if (txType == TxType.DctDct)
261              {
262                  Idct8x8Add(input, dest, stride, eob);
263              }
264              else
265              {
266                  Iht8x864Add(input, dest, stride, (int)txType);
267              }
268          }
269  
270          public static void Iht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<byte> dest,
271                                int stride, int eob)
272          {
273              if (txType == TxType.DctDct)
274              {
275                  Idct16x16Add(input, dest, stride, eob);
276              }
277              else
278              {
279                  Iht16x16256Add(input, dest, stride, (int)txType);
280              }
281          }
282  
283          private static readonly HighbdTransform2D[] _highbdIht4 = {
284              new(HighbdIdct4, HighbdIdct4), // DCT_DCT  = 0
285              new(HighbdIadst4, HighbdIdct4), // ADST_DCT = 1
286              new(HighbdIdct4, HighbdIadst4), // DCT_ADST = 2
287              new(HighbdIadst4, HighbdIadst4), // ADST_ADST = 3
288          };
289  
290          public static void HighbdIht4x416Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
291          {
292              int i, j;
293              Span<int> output = stackalloc int[4 * 4];
294              Span<int> outptr = output;
295              Span<int> tempIn = stackalloc int[4];
296              Span<int> tempOut = stackalloc int[4];
297  
298              // Inverse transform row vectors.
299              for (i = 0; i < 4; ++i)
300              {
301                  _highbdIht4[txType].Rows(input, outptr, bd);
302                  input = input[4..];
303                  outptr = outptr[4..];
304              }
305  
306              // Inverse transform column vectors.
307              for (i = 0; i < 4; ++i)
308              {
309                  for (j = 0; j < 4; ++j)
310                  {
311                      tempIn[j] = output[j * 4 + i];
312                  }
313  
314                  _highbdIht4[txType].Cols(tempIn, tempOut, bd);
315                  for (j = 0; j < 4; ++j)
316                  {
317                      dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 4), bd);
318                  }
319              }
320          }
321  
322          private static readonly HighbdTransform2D[] _highIht8 = {
323              new(HighbdIdct8, HighbdIdct8), // DCT_DCT  = 0
324              new(HighbdIadst8, HighbdIdct8), // ADST_DCT = 1
325              new(HighbdIdct8, HighbdIadst8), // DCT_ADST = 2
326              new(HighbdIadst8, HighbdIadst8), // ADST_ADST = 3
327          };
328  
329          public static void HighbdIht8x864Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
330          {
331              int i, j;
332              Span<int> output = stackalloc int[8 * 8];
333              Span<int> outptr = output;
334              Span<int> tempIn = stackalloc int[8];
335              Span<int> tempOut = stackalloc int[8];
336              HighbdTransform2D ht = _highIht8[txType];
337  
338              // Inverse transform row vectors.
339              for (i = 0; i < 8; ++i)
340              {
341                  ht.Rows(input, outptr, bd);
342                  input = input[8..];
343                  outptr = output[8..];
344              }
345  
346              // Inverse transform column vectors.
347              for (i = 0; i < 8; ++i)
348              {
349                  for (j = 0; j < 8; ++j)
350                  {
351                      tempIn[j] = output[j * 8 + i];
352                  }
353  
354                  ht.Cols(tempIn, tempOut, bd);
355                  for (j = 0; j < 8; ++j)
356                  {
357                      dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 5), bd);
358                  }
359              }
360          }
361  
362          private static readonly HighbdTransform2D[] _highIht16 = {
363              new(HighbdIdct16, HighbdIdct16), // DCT_DCT  = 0
364              new(HighbdIadst16, HighbdIdct16), // ADST_DCT = 1
365              new(HighbdIdct16, HighbdIadst16), // DCT_ADST = 2
366              new(HighbdIadst16, HighbdIadst16), // ADST_ADST = 3
367          };
368  
369          public static void HighbdIht16x16256Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int txType, int bd)
370          {
371              int i, j;
372              Span<int> output = stackalloc int[16 * 16];
373              Span<int> outptr = output;
374              Span<int> tempIn = stackalloc int[16];
375              Span<int> tempOut = stackalloc int[16];
376              HighbdTransform2D ht = _highIht16[txType];
377  
378              // Rows
379              for (i = 0; i < 16; ++i)
380              {
381                  ht.Rows(input, outptr, bd);
382                  input = input[16..];
383                  outptr = output[16..];
384              }
385  
386              // Columns
387              for (i = 0; i < 16; ++i)
388              {
389                  for (j = 0; j < 16; ++j)
390                  {
391                      tempIn[j] = output[j * 16 + i];
392                  }
393  
394                  ht.Cols(tempIn, tempOut, bd);
395                  for (j = 0; j < 16; ++j)
396                  {
397                      dest[j * stride + i] = HighbdClipPixelAdd(dest[j * stride + i], BitUtils.RoundPowerOfTwo(tempOut[j], 6), bd);
398                  }
399              }
400          }
401  
402          // Idct
403          public static void HighbdIdct4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
404          {
405              if (eob > 1)
406              {
407                  HighbdIdct4x416Add(input, dest, stride, bd);
408              }
409              else
410              {
411                  HighbdIdct4x41Add(input, dest, stride, bd);
412              }
413          }
414  
415          public static void HighbdIwht4x4Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
416          {
417              if (eob > 1)
418              {
419                  HighbdIwht4x416Add(input, dest, stride, bd);
420              }
421              else
422              {
423                  HighbdIwht4x41Add(input, dest, stride, bd);
424              }
425          }
426  
427          public static void HighbdIdct8x8Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
428          {
429              // If dc is 1, then input[0] is the reconstructed value, do not need
430              // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
431  
432              // The calculation can be simplified if there are not many non-zero dct
433              // coefficients. Use eobs to decide what to do.
434              // DC only DCT coefficient
435              if (eob == 1)
436              {
437                  Vpx_Highbdidct8x8_1_add_c(input, dest, stride, bd);
438              }
439              else if (eob <= 12)
440              {
441                  HighbdIdct8x812Add(input, dest, stride, bd);
442              }
443              else
444              {
445                  HighbdIdct8x864Add(input, dest, stride, bd);
446              }
447          }
448  
449          public static void HighbdIdct16x16Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
450          {
451              // The calculation can be simplified if there are not many non-zero dct
452              // coefficients. Use eobs to separate different cases.
453              // DC only DCT coefficient.
454              if (eob == 1)
455              {
456                  HighbdIdct16x161Add(input, dest, stride, bd);
457              }
458              else if (eob <= 10)
459              {
460                  HighbdIdct16x1610Add(input, dest, stride, bd);
461              }
462              else if (eob <= 38)
463              {
464                  HighbdIdct16x1638Add(input, dest, stride, bd);
465              }
466              else
467              {
468                  HighbdIdct16x16256Add(input, dest, stride, bd);
469              }
470          }
471  
472          public static void HighbdIdct32x32Add(ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
473          {
474              // Non-zero coeff only in upper-left 8x8
475              if (eob == 1)
476              {
477                  HighbdIdct32x321Add(input, dest, stride, bd);
478              }
479              else if (eob <= 34)
480              {
481                  HighbdIdct32x3234Add(input, dest, stride, bd);
482              }
483              else if (eob <= 135)
484              {
485                  HighbdIdct32x32135Add(input, dest, stride, bd);
486              }
487              else
488              {
489                  HighbdIdct32x321024Add(input, dest, stride, bd);
490              }
491          }
492  
493          // Iht
494          public static void HighbdIht4x4Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
495          {
496              if (txType == TxType.DctDct)
497              {
498                  HighbdIdct4x4Add(input, dest, stride, eob, bd);
499              }
500              else
501              {
502                  HighbdIht4x416Add(input, dest, stride, (int)txType, bd);
503              }
504          }
505  
506          public static void HighbdIht8x8Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
507          {
508              if (txType == TxType.DctDct)
509              {
510                  HighbdIdct8x8Add(input, dest, stride, eob, bd);
511              }
512              else
513              {
514                  HighbdIht8x864Add(input, dest, stride, (int)txType, bd);
515              }
516          }
517  
518          public static void HighbdIht16x16Add(TxType txType, ReadOnlySpan<int> input, Span<ushort> dest, int stride, int eob, int bd)
519          {
520              if (txType == TxType.DctDct)
521              {
522                  HighbdIdct16x16Add(input, dest, stride, eob, bd);
523              }
524              else
525              {
526                  HighbdIht16x16256Add(input, dest, stride, (int)txType, bd);
527              }
528          }
529      }
530  }