/ src / Ryujinx.Graphics.Nvdec.Vp9 / LoopFilter.cs
LoopFilter.cs
  1  using Ryujinx.Common.Memory;
  2  using Ryujinx.Graphics.Nvdec.Vp9.Common;
  3  using Ryujinx.Graphics.Nvdec.Vp9.Types;
  4  using System;
  5  using System.Runtime.InteropServices;
  6  
  7  namespace Ryujinx.Graphics.Nvdec.Vp9
  8  {
  9      internal static class LoopFilter
 10      {
 11          public const int MaxLoopFilter = 63;
 12  
 13          public const int MaxRefLfDeltas = 4;
 14          public const int MaxModeLfDeltas = 2;
 15  
 16          // 64 bit masks for left transform size. Each 1 represents a position where
 17          // we should apply a loop filter across the left border of an 8x8 block
 18          // boundary.
 19          //
 20          // In the case of TX_16X16 ->  ( in low order byte first we end up with
 21          // a mask that looks like this
 22          //
 23          //    10101010
 24          //    10101010
 25          //    10101010
 26          //    10101010
 27          //    10101010
 28          //    10101010
 29          //    10101010
 30          //    10101010
 31          //
 32          // A loopfilter should be applied to every other 8x8 horizontally.
 33          private static readonly ulong[] _left64X64TxformMask = {
 34              0xffffffffffffffffUL, // TX_4X4
 35              0xffffffffffffffffUL, // TX_8x8
 36              0x5555555555555555UL, // TX_16x16
 37              0x1111111111111111UL, // TX_32x32
 38          };
 39  
 40          // 64 bit masks for above transform size. Each 1 represents a position where
 41          // we should apply a loop filter across the top border of an 8x8 block
 42          // boundary.
 43          //
 44          // In the case of TX_32x32 ->  ( in low order byte first we end up with
 45          // a mask that looks like this
 46          //
 47          //    11111111
 48          //    00000000
 49          //    00000000
 50          //    00000000
 51          //    11111111
 52          //    00000000
 53          //    00000000
 54          //    00000000
 55          //
 56          // A loopfilter should be applied to every other 4 the row vertically.
 57          private static readonly ulong[] _above64X64TxformMask = {
 58              0xffffffffffffffffUL, // TX_4X4
 59              0xffffffffffffffffUL, // TX_8x8
 60              0x00ff00ff00ff00ffUL, // TX_16x16
 61              0x000000ff000000ffUL, // TX_32x32
 62          };
 63  
 64          // 64 bit masks for prediction sizes (left). Each 1 represents a position
 65          // where left border of an 8x8 block. These are aligned to the right most
 66          // appropriate bit, and then shifted into place.
 67          //
 68          // In the case of TX_16x32 ->  ( low order byte first ) we end up with
 69          // a mask that looks like this :
 70          //
 71          //  10000000
 72          //  10000000
 73          //  10000000
 74          //  10000000
 75          //  00000000
 76          //  00000000
 77          //  00000000
 78          //  00000000
 79          private static readonly ulong[] _leftPredictionMask = {
 80              0x0000000000000001UL, // BLOCK_4X4,
 81              0x0000000000000001UL, // BLOCK_4X8,
 82              0x0000000000000001UL, // BLOCK_8X4,
 83              0x0000000000000001UL, // BLOCK_8X8,
 84              0x0000000000000101UL, // BLOCK_8X16,
 85              0x0000000000000001UL, // BLOCK_16X8,
 86              0x0000000000000101UL, // BLOCK_16X16,
 87              0x0000000001010101UL, // BLOCK_16X32,
 88              0x0000000000000101UL, // BLOCK_32X16,
 89              0x0000000001010101UL, // BLOCK_32X32,
 90              0x0101010101010101UL, // BLOCK_32X64,
 91              0x0000000001010101UL, // BLOCK_64X32,
 92              0x0101010101010101UL, // BLOCK_64X64
 93          };
 94  
 95          // 64 bit mask to shift and set for each prediction size.
 96          private static readonly ulong[] _abovePredictionMask = {
 97              0x0000000000000001UL, // BLOCK_4X4
 98              0x0000000000000001UL, // BLOCK_4X8
 99              0x0000000000000001UL, // BLOCK_8X4
100              0x0000000000000001UL, // BLOCK_8X8
101              0x0000000000000001UL, // BLOCK_8X16,
102              0x0000000000000003UL, // BLOCK_16X8
103              0x0000000000000003UL, // BLOCK_16X16
104              0x0000000000000003UL, // BLOCK_16X32,
105              0x000000000000000fUL, // BLOCK_32X16,
106              0x000000000000000fUL, // BLOCK_32X32,
107              0x000000000000000fUL, // BLOCK_32X64,
108              0x00000000000000ffUL, // BLOCK_64X32,
109              0x00000000000000ffUL, // BLOCK_64X64
110          };
111  
112          // 64 bit mask to shift and set for each prediction size. A bit is set for
113          // each 8x8 block that would be in the left most block of the given block
114          // size in the 64x64 block.
115          private static readonly ulong[] _sizeMask = {
116              0x0000000000000001UL, // BLOCK_4X4
117              0x0000000000000001UL, // BLOCK_4X8
118              0x0000000000000001UL, // BLOCK_8X4
119              0x0000000000000001UL, // BLOCK_8X8
120              0x0000000000000101UL, // BLOCK_8X16,
121              0x0000000000000003UL, // BLOCK_16X8
122              0x0000000000000303UL, // BLOCK_16X16
123              0x0000000003030303UL, // BLOCK_16X32,
124              0x0000000000000f0fUL, // BLOCK_32X16,
125              0x000000000f0f0f0fUL, // BLOCK_32X32,
126              0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64,
127              0x00000000ffffffffUL, // BLOCK_64X32,
128              0xffffffffffffffffUL, // BLOCK_64X64
129          };
130  
131          // These are used for masking the left and above borders.
132  #pragma warning disable IDE0051 // Remove unused private member
133          private const ulong LeftBorder = 0x1111111111111111UL;
134          private const ulong AboveBorder = 0x000000ff000000ffUL;
135  #pragma warning restore IDE0051
136  
137          // 16 bit masks for uv transform sizes.
138          private static readonly ushort[] _left64X64TxformMaskUv = {
139              0xffff, // TX_4X4
140              0xffff, // TX_8x8
141              0x5555, // TX_16x16
142              0x1111, // TX_32x32
143          };
144  
145          private static readonly ushort[] _above64X64TxformMaskUv = {
146              0xffff, // TX_4X4
147              0xffff, // TX_8x8
148              0x0f0f, // TX_16x16
149              0x000f, // TX_32x32
150          };
151  
152          // 16 bit left mask to shift and set for each uv prediction size.
153          private static readonly ushort[] _leftPredictionMaskUv = {
154              0x0001, // BLOCK_4X4,
155              0x0001, // BLOCK_4X8,
156              0x0001, // BLOCK_8X4,
157              0x0001, // BLOCK_8X8,
158              0x0001, // BLOCK_8X16,
159              0x0001, // BLOCK_16X8,
160              0x0001, // BLOCK_16X16,
161              0x0011, // BLOCK_16X32,
162              0x0001, // BLOCK_32X16,
163              0x0011, // BLOCK_32X32,
164              0x1111, // BLOCK_32X64
165              0x0011, // BLOCK_64X32,
166              0x1111, // BLOCK_64X64
167          };
168  
169          // 16 bit above mask to shift and set for uv each prediction size.
170          private static readonly ushort[] _abovePredictionMaskUv = {
171              0x0001, // BLOCK_4X4
172              0x0001, // BLOCK_4X8
173              0x0001, // BLOCK_8X4
174              0x0001, // BLOCK_8X8
175              0x0001, // BLOCK_8X16,
176              0x0001, // BLOCK_16X8
177              0x0001, // BLOCK_16X16
178              0x0001, // BLOCK_16X32,
179              0x0003, // BLOCK_32X16,
180              0x0003, // BLOCK_32X32,
181              0x0003, // BLOCK_32X64,
182              0x000f, // BLOCK_64X32,
183              0x000f, // BLOCK_64X64
184          };
185  
186          // 64 bit mask to shift and set for each uv prediction size
187          private static readonly ushort[] _sizeMaskUv = {
188              0x0001, // BLOCK_4X4
189              0x0001, // BLOCK_4X8
190              0x0001, // BLOCK_8X4
191              0x0001, // BLOCK_8X8
192              0x0001, // BLOCK_8X16,
193              0x0001, // BLOCK_16X8
194              0x0001, // BLOCK_16X16
195              0x0011, // BLOCK_16X32,
196              0x0003, // BLOCK_32X16,
197              0x0033, // BLOCK_32X32,
198              0x3333, // BLOCK_32X64,
199              0x00ff, // BLOCK_64X32,
200              0xffff, // BLOCK_64X64
201          };
202  
203  #pragma warning disable IDE0051 // Remove unused private member
204          private const ushort LeftBorderUv = 0x1111;
205          private const ushort AboveBorderUv = 0x000f;
206  #pragma warning restore IDE0051
207  
208          private static readonly int[] _modeLfLut = {
209              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
210              1, 1, 0, 1, // INTER_MODES (ZEROMV == 0)
211          };
212  
213          private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi)
214          {
215              return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][_modeLfLut[(int)mi.Mode]];
216          }
217  
218          private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol)
219          {
220              return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)];
221          }
222  
223          // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
224          // or greater area.
225          private static readonly byte[][] _firstBlockIn16X16 = {
226              new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
227              new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
228              new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
229              new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 },
230          };
231  
232          // This function sets up the bit masks for a block represented
233          // by miRow, miCol in a 64x64 region.
234          public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh)
235          {
236              BlockSize blockSize = mi.SbType;
237              TxSize txSizeY = mi.TxSize;
238              ref LoopFilterInfoN lfiN = ref cm.LfInfo;
239              int filterLevel = GetFilterLevel(ref lfiN, ref mi);
240              TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1];
241              ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol);
242              ref ulong leftY = ref lfm.LeftY[(int)txSizeY];
243              ref ulong aboveY = ref lfm.AboveY[(int)txSizeY];
244              ref ulong int4X4Y = ref lfm.Int4x4Y;
245              ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv];
246              ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv];
247              ref ushort int4X4Uv = ref lfm.Int4x4Uv;
248              int rowInSb = (miRow & 7);
249              int colInSb = (miCol & 7);
250              int shiftY = colInSb + (rowInSb << 3);
251              int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2);
252              int buildUv = _firstBlockIn16X16[rowInSb][colInSb];
253  
254              if (filterLevel == 0)
255              {
256                  return;
257              }
258  
259              int index = shiftY;
260              int i;
261              for (i = 0; i < bh; i++)
262              {
263                  MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index)[..bw].Fill((byte)filterLevel);
264                  index += 8;
265              }
266  
267              // These set 1 in the current block size for the block size edges.
268              // For instance if the block size is 32x16, we'll set:
269              //    above =   1111
270              //              0000
271              //    and
272              //    left  =   1000
273              //          =   1000
274              // NOTE : In this example the low bit is left most ( 1000 ) is stored as
275              //        1,  not 8...
276              //
277              // U and V set things on a 16 bit scale.
278              //
279              aboveY |= _abovePredictionMask[(int)blockSize] << shiftY;
280              leftY |= _leftPredictionMask[(int)blockSize] << shiftY;
281  
282              if (buildUv != 0)
283              {
284                  aboveUv |= (ushort)(_abovePredictionMaskUv[(int)blockSize] << shiftUv);
285                  leftUv |= (ushort)(_leftPredictionMaskUv[(int)blockSize] << shiftUv);
286              }
287  
288              // If the block has no coefficients and is not intra we skip applying
289              // the loop filter on block edges.
290              if (mi.Skip != 0 && mi.IsInterBlock())
291              {
292                  return;
293              }
294  
295              // Add a mask for the transform size. The transform size mask is set to
296              // be correct for a 64x64 prediction block size. Mask to match the size of
297              // the block we are working on and then shift it into place.
298              aboveY |= (_sizeMask[(int)blockSize] & _above64X64TxformMask[(int)txSizeY]) << shiftY;
299              leftY |= (_sizeMask[(int)blockSize] & _left64X64TxformMask[(int)txSizeY]) << shiftY;
300  
301              if (buildUv != 0)
302              {
303                  aboveUv |= (ushort)((_sizeMaskUv[(int)blockSize] & _above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
304                  leftUv |= (ushort)((_sizeMaskUv[(int)blockSize] & _left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv);
305              }
306  
307              // Try to determine what to do with the internal 4x4 block boundaries. These
308              // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
309              // internal ones can be skipped and don't depend on the prediction block size.
310              if (txSizeY == TxSize.Tx4x4)
311              {
312                  int4X4Y |= _sizeMask[(int)blockSize] << shiftY;
313              }
314  
315              if (buildUv != 0 && txSizeUv == TxSize.Tx4x4)
316              {
317                  int4X4Uv |= (ushort)((_sizeMaskUv[(int)blockSize] & 0xffff) << shiftUv);
318              }
319          }
320  
321          public static unsafe void ResetLfm(ref Vp9Common cm)
322          {
323              if (cm.Lf.FilterLevel != 0)
324              {
325                  MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride);
326              }
327          }
328  
329          private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl)
330          {
331              int lvl;
332  
333              // For each possible value for the loop filter fill out limits
334              for (lvl = 0; lvl <= MaxLoopFilter; lvl++)
335              {
336                  // Set loop filter parameters that control sharpness.
337                  int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0));
338  
339                  if (sharpnessLvl > 0)
340                  {
341                      if (blockInsideLimit > (9 - sharpnessLvl))
342                      {
343                          blockInsideLimit = (9 - sharpnessLvl);
344                      }
345                  }
346  
347                  if (blockInsideLimit < 1)
348                  {
349                      blockInsideLimit = 1;
350                  }
351  
352                  lfi.Lfthr[lvl].Lim.AsSpan().Fill((byte)blockInsideLimit);
353                  lfi.Lfthr[lvl].Mblim.AsSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit));
354              }
355          }
356  
357          public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl)
358          {
359              int segId;
360              // nShift is the multiplier for lfDeltas
361              // the multiplier is 1 for when filterLvl is between 0 and 31;
362              // 2 when filterLvl is between 32 and 63
363              int scale = 1 << (defaultFiltLvl >> 5);
364              ref LoopFilterInfoN lfi = ref cm.LfInfo;
365              ref Types.LoopFilter lf = ref cm.Lf;
366              ref Segmentation seg = ref cm.Seg;
367  
368              // Update limits if sharpness has changed
369              if (lf.LastSharpnessLevel != lf.SharpnessLevel)
370              {
371                  UpdateSharpness(ref lfi, lf.SharpnessLevel);
372                  lf.LastSharpnessLevel = lf.SharpnessLevel;
373              }
374  
375              for (segId = 0; segId < Constants.MaxSegments; segId++)
376              {
377                  int lvlSeg = defaultFiltLvl;
378                  if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0)
379                  {
380                      int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf);
381                      lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter);
382                  }
383  
384                  if (!lf.ModeRefDeltaEnabled)
385                  {
386                      // We could get rid of this if we assume that deltas are set to
387                      // zero when not in use; encoder always uses deltas
388                      MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].AsSpan()).Fill((byte)lvlSeg);
389                  }
390                  else
391                  {
392                      int refr, mode;
393                      int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale;
394                      lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter);
395  
396                      for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr)
397                      {
398                          for (mode = 0; mode < MaxModeLfDeltas; ++mode)
399                          {
400                              int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale;
401                              lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter);
402                          }
403                      }
404                  }
405              }
406          }
407      }
408  }