LoopFilter.cs
1 using Ryujinx.Common.Memory; 2 using Ryujinx.Graphics.Nvdec.Vp9.Common; 3 using Ryujinx.Graphics.Nvdec.Vp9.Types; 4 using System; 5 using System.Runtime.InteropServices; 6 7 namespace Ryujinx.Graphics.Nvdec.Vp9 8 { 9 internal static class LoopFilter 10 { 11 public const int MaxLoopFilter = 63; 12 13 public const int MaxRefLfDeltas = 4; 14 public const int MaxModeLfDeltas = 2; 15 16 // 64 bit masks for left transform size. Each 1 represents a position where 17 // we should apply a loop filter across the left border of an 8x8 block 18 // boundary. 19 // 20 // In the case of TX_16X16 -> ( in low order byte first we end up with 21 // a mask that looks like this 22 // 23 // 10101010 24 // 10101010 25 // 10101010 26 // 10101010 27 // 10101010 28 // 10101010 29 // 10101010 30 // 10101010 31 // 32 // A loopfilter should be applied to every other 8x8 horizontally. 33 private static readonly ulong[] _left64X64TxformMask = { 34 0xffffffffffffffffUL, // TX_4X4 35 0xffffffffffffffffUL, // TX_8x8 36 0x5555555555555555UL, // TX_16x16 37 0x1111111111111111UL, // TX_32x32 38 }; 39 40 // 64 bit masks for above transform size. Each 1 represents a position where 41 // we should apply a loop filter across the top border of an 8x8 block 42 // boundary. 43 // 44 // In the case of TX_32x32 -> ( in low order byte first we end up with 45 // a mask that looks like this 46 // 47 // 11111111 48 // 00000000 49 // 00000000 50 // 00000000 51 // 11111111 52 // 00000000 53 // 00000000 54 // 00000000 55 // 56 // A loopfilter should be applied to every other 4 the row vertically. 57 private static readonly ulong[] _above64X64TxformMask = { 58 0xffffffffffffffffUL, // TX_4X4 59 0xffffffffffffffffUL, // TX_8x8 60 0x00ff00ff00ff00ffUL, // TX_16x16 61 0x000000ff000000ffUL, // TX_32x32 62 }; 63 64 // 64 bit masks for prediction sizes (left). Each 1 represents a position 65 // where left border of an 8x8 block. These are aligned to the right most 66 // appropriate bit, and then shifted into place. 67 // 68 // In the case of TX_16x32 -> ( low order byte first ) we end up with 69 // a mask that looks like this : 70 // 71 // 10000000 72 // 10000000 73 // 10000000 74 // 10000000 75 // 00000000 76 // 00000000 77 // 00000000 78 // 00000000 79 private static readonly ulong[] _leftPredictionMask = { 80 0x0000000000000001UL, // BLOCK_4X4, 81 0x0000000000000001UL, // BLOCK_4X8, 82 0x0000000000000001UL, // BLOCK_8X4, 83 0x0000000000000001UL, // BLOCK_8X8, 84 0x0000000000000101UL, // BLOCK_8X16, 85 0x0000000000000001UL, // BLOCK_16X8, 86 0x0000000000000101UL, // BLOCK_16X16, 87 0x0000000001010101UL, // BLOCK_16X32, 88 0x0000000000000101UL, // BLOCK_32X16, 89 0x0000000001010101UL, // BLOCK_32X32, 90 0x0101010101010101UL, // BLOCK_32X64, 91 0x0000000001010101UL, // BLOCK_64X32, 92 0x0101010101010101UL, // BLOCK_64X64 93 }; 94 95 // 64 bit mask to shift and set for each prediction size. 96 private static readonly ulong[] _abovePredictionMask = { 97 0x0000000000000001UL, // BLOCK_4X4 98 0x0000000000000001UL, // BLOCK_4X8 99 0x0000000000000001UL, // BLOCK_8X4 100 0x0000000000000001UL, // BLOCK_8X8 101 0x0000000000000001UL, // BLOCK_8X16, 102 0x0000000000000003UL, // BLOCK_16X8 103 0x0000000000000003UL, // BLOCK_16X16 104 0x0000000000000003UL, // BLOCK_16X32, 105 0x000000000000000fUL, // BLOCK_32X16, 106 0x000000000000000fUL, // BLOCK_32X32, 107 0x000000000000000fUL, // BLOCK_32X64, 108 0x00000000000000ffUL, // BLOCK_64X32, 109 0x00000000000000ffUL, // BLOCK_64X64 110 }; 111 112 // 64 bit mask to shift and set for each prediction size. A bit is set for 113 // each 8x8 block that would be in the left most block of the given block 114 // size in the 64x64 block. 115 private static readonly ulong[] _sizeMask = { 116 0x0000000000000001UL, // BLOCK_4X4 117 0x0000000000000001UL, // BLOCK_4X8 118 0x0000000000000001UL, // BLOCK_8X4 119 0x0000000000000001UL, // BLOCK_8X8 120 0x0000000000000101UL, // BLOCK_8X16, 121 0x0000000000000003UL, // BLOCK_16X8 122 0x0000000000000303UL, // BLOCK_16X16 123 0x0000000003030303UL, // BLOCK_16X32, 124 0x0000000000000f0fUL, // BLOCK_32X16, 125 0x000000000f0f0f0fUL, // BLOCK_32X32, 126 0x0f0f0f0f0f0f0f0fUL, // BLOCK_32X64, 127 0x00000000ffffffffUL, // BLOCK_64X32, 128 0xffffffffffffffffUL, // BLOCK_64X64 129 }; 130 131 // These are used for masking the left and above borders. 132 #pragma warning disable IDE0051 // Remove unused private member 133 private const ulong LeftBorder = 0x1111111111111111UL; 134 private const ulong AboveBorder = 0x000000ff000000ffUL; 135 #pragma warning restore IDE0051 136 137 // 16 bit masks for uv transform sizes. 138 private static readonly ushort[] _left64X64TxformMaskUv = { 139 0xffff, // TX_4X4 140 0xffff, // TX_8x8 141 0x5555, // TX_16x16 142 0x1111, // TX_32x32 143 }; 144 145 private static readonly ushort[] _above64X64TxformMaskUv = { 146 0xffff, // TX_4X4 147 0xffff, // TX_8x8 148 0x0f0f, // TX_16x16 149 0x000f, // TX_32x32 150 }; 151 152 // 16 bit left mask to shift and set for each uv prediction size. 153 private static readonly ushort[] _leftPredictionMaskUv = { 154 0x0001, // BLOCK_4X4, 155 0x0001, // BLOCK_4X8, 156 0x0001, // BLOCK_8X4, 157 0x0001, // BLOCK_8X8, 158 0x0001, // BLOCK_8X16, 159 0x0001, // BLOCK_16X8, 160 0x0001, // BLOCK_16X16, 161 0x0011, // BLOCK_16X32, 162 0x0001, // BLOCK_32X16, 163 0x0011, // BLOCK_32X32, 164 0x1111, // BLOCK_32X64 165 0x0011, // BLOCK_64X32, 166 0x1111, // BLOCK_64X64 167 }; 168 169 // 16 bit above mask to shift and set for uv each prediction size. 170 private static readonly ushort[] _abovePredictionMaskUv = { 171 0x0001, // BLOCK_4X4 172 0x0001, // BLOCK_4X8 173 0x0001, // BLOCK_8X4 174 0x0001, // BLOCK_8X8 175 0x0001, // BLOCK_8X16, 176 0x0001, // BLOCK_16X8 177 0x0001, // BLOCK_16X16 178 0x0001, // BLOCK_16X32, 179 0x0003, // BLOCK_32X16, 180 0x0003, // BLOCK_32X32, 181 0x0003, // BLOCK_32X64, 182 0x000f, // BLOCK_64X32, 183 0x000f, // BLOCK_64X64 184 }; 185 186 // 64 bit mask to shift and set for each uv prediction size 187 private static readonly ushort[] _sizeMaskUv = { 188 0x0001, // BLOCK_4X4 189 0x0001, // BLOCK_4X8 190 0x0001, // BLOCK_8X4 191 0x0001, // BLOCK_8X8 192 0x0001, // BLOCK_8X16, 193 0x0001, // BLOCK_16X8 194 0x0001, // BLOCK_16X16 195 0x0011, // BLOCK_16X32, 196 0x0003, // BLOCK_32X16, 197 0x0033, // BLOCK_32X32, 198 0x3333, // BLOCK_32X64, 199 0x00ff, // BLOCK_64X32, 200 0xffff, // BLOCK_64X64 201 }; 202 203 #pragma warning disable IDE0051 // Remove unused private member 204 private const ushort LeftBorderUv = 0x1111; 205 private const ushort AboveBorderUv = 0x000f; 206 #pragma warning restore IDE0051 207 208 private static readonly int[] _modeLfLut = { 209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES 210 1, 1, 0, 1, // INTER_MODES (ZEROMV == 0) 211 }; 212 213 private static byte GetFilterLevel(ref LoopFilterInfoN lfiN, ref ModeInfo mi) 214 { 215 return lfiN.Lvl[mi.SegmentId][mi.RefFrame[0]][_modeLfLut[(int)mi.Mode]]; 216 } 217 218 private static ref LoopFilterMask GetLfm(ref Types.LoopFilter lf, int miRow, int miCol) 219 { 220 return ref lf.Lfm[(miCol >> 3) + ((miRow >> 3) * lf.LfmStride)]; 221 } 222 223 // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 224 // or greater area. 225 private static readonly byte[][] _firstBlockIn16X16 = { 226 new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, 227 new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, 228 new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, 229 new byte[] { 1, 0, 1, 0, 1, 0, 1, 0 }, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0 }, 230 }; 231 232 // This function sets up the bit masks for a block represented 233 // by miRow, miCol in a 64x64 region. 234 public static void BuildMask(ref Vp9Common cm, ref ModeInfo mi, int miRow, int miCol, int bw, int bh) 235 { 236 BlockSize blockSize = mi.SbType; 237 TxSize txSizeY = mi.TxSize; 238 ref LoopFilterInfoN lfiN = ref cm.LfInfo; 239 int filterLevel = GetFilterLevel(ref lfiN, ref mi); 240 TxSize txSizeUv = Luts.UvTxsizeLookup[(int)blockSize][(int)txSizeY][1][1]; 241 ref LoopFilterMask lfm = ref GetLfm(ref cm.Lf, miRow, miCol); 242 ref ulong leftY = ref lfm.LeftY[(int)txSizeY]; 243 ref ulong aboveY = ref lfm.AboveY[(int)txSizeY]; 244 ref ulong int4X4Y = ref lfm.Int4x4Y; 245 ref ushort leftUv = ref lfm.LeftUv[(int)txSizeUv]; 246 ref ushort aboveUv = ref lfm.AboveUv[(int)txSizeUv]; 247 ref ushort int4X4Uv = ref lfm.Int4x4Uv; 248 int rowInSb = (miRow & 7); 249 int colInSb = (miCol & 7); 250 int shiftY = colInSb + (rowInSb << 3); 251 int shiftUv = (colInSb >> 1) + ((rowInSb >> 1) << 2); 252 int buildUv = _firstBlockIn16X16[rowInSb][colInSb]; 253 254 if (filterLevel == 0) 255 { 256 return; 257 } 258 259 int index = shiftY; 260 int i; 261 for (i = 0; i < bh; i++) 262 { 263 MemoryMarshal.CreateSpan(ref lfm.LflY[index], 64 - index)[..bw].Fill((byte)filterLevel); 264 index += 8; 265 } 266 267 // These set 1 in the current block size for the block size edges. 268 // For instance if the block size is 32x16, we'll set: 269 // above = 1111 270 // 0000 271 // and 272 // left = 1000 273 // = 1000 274 // NOTE : In this example the low bit is left most ( 1000 ) is stored as 275 // 1, not 8... 276 // 277 // U and V set things on a 16 bit scale. 278 // 279 aboveY |= _abovePredictionMask[(int)blockSize] << shiftY; 280 leftY |= _leftPredictionMask[(int)blockSize] << shiftY; 281 282 if (buildUv != 0) 283 { 284 aboveUv |= (ushort)(_abovePredictionMaskUv[(int)blockSize] << shiftUv); 285 leftUv |= (ushort)(_leftPredictionMaskUv[(int)blockSize] << shiftUv); 286 } 287 288 // If the block has no coefficients and is not intra we skip applying 289 // the loop filter on block edges. 290 if (mi.Skip != 0 && mi.IsInterBlock()) 291 { 292 return; 293 } 294 295 // Add a mask for the transform size. The transform size mask is set to 296 // be correct for a 64x64 prediction block size. Mask to match the size of 297 // the block we are working on and then shift it into place. 298 aboveY |= (_sizeMask[(int)blockSize] & _above64X64TxformMask[(int)txSizeY]) << shiftY; 299 leftY |= (_sizeMask[(int)blockSize] & _left64X64TxformMask[(int)txSizeY]) << shiftY; 300 301 if (buildUv != 0) 302 { 303 aboveUv |= (ushort)((_sizeMaskUv[(int)blockSize] & _above64X64TxformMaskUv[(int)txSizeUv]) << shiftUv); 304 leftUv |= (ushort)((_sizeMaskUv[(int)blockSize] & _left64X64TxformMaskUv[(int)txSizeUv]) << shiftUv); 305 } 306 307 // Try to determine what to do with the internal 4x4 block boundaries. These 308 // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the 309 // internal ones can be skipped and don't depend on the prediction block size. 310 if (txSizeY == TxSize.Tx4x4) 311 { 312 int4X4Y |= _sizeMask[(int)blockSize] << shiftY; 313 } 314 315 if (buildUv != 0 && txSizeUv == TxSize.Tx4x4) 316 { 317 int4X4Uv |= (ushort)((_sizeMaskUv[(int)blockSize] & 0xffff) << shiftUv); 318 } 319 } 320 321 public static unsafe void ResetLfm(ref Vp9Common cm) 322 { 323 if (cm.Lf.FilterLevel != 0) 324 { 325 MemoryUtil.Fill(cm.Lf.Lfm.ToPointer(), new LoopFilterMask(), ((cm.MiRows + (Constants.MiBlockSize - 1)) >> 3) * cm.Lf.LfmStride); 326 } 327 } 328 329 private static void UpdateSharpness(ref LoopFilterInfoN lfi, int sharpnessLvl) 330 { 331 int lvl; 332 333 // For each possible value for the loop filter fill out limits 334 for (lvl = 0; lvl <= MaxLoopFilter; lvl++) 335 { 336 // Set loop filter parameters that control sharpness. 337 int blockInsideLimit = lvl >> ((sharpnessLvl > 0 ? 1 : 0) + (sharpnessLvl > 4 ? 1 : 0)); 338 339 if (sharpnessLvl > 0) 340 { 341 if (blockInsideLimit > (9 - sharpnessLvl)) 342 { 343 blockInsideLimit = (9 - sharpnessLvl); 344 } 345 } 346 347 if (blockInsideLimit < 1) 348 { 349 blockInsideLimit = 1; 350 } 351 352 lfi.Lfthr[lvl].Lim.AsSpan().Fill((byte)blockInsideLimit); 353 lfi.Lfthr[lvl].Mblim.AsSpan().Fill((byte)(2 * (lvl + 2) + blockInsideLimit)); 354 } 355 } 356 357 public static void LoopFilterFrameInit(ref Vp9Common cm, int defaultFiltLvl) 358 { 359 int segId; 360 // nShift is the multiplier for lfDeltas 361 // the multiplier is 1 for when filterLvl is between 0 and 31; 362 // 2 when filterLvl is between 32 and 63 363 int scale = 1 << (defaultFiltLvl >> 5); 364 ref LoopFilterInfoN lfi = ref cm.LfInfo; 365 ref Types.LoopFilter lf = ref cm.Lf; 366 ref Segmentation seg = ref cm.Seg; 367 368 // Update limits if sharpness has changed 369 if (lf.LastSharpnessLevel != lf.SharpnessLevel) 370 { 371 UpdateSharpness(ref lfi, lf.SharpnessLevel); 372 lf.LastSharpnessLevel = lf.SharpnessLevel; 373 } 374 375 for (segId = 0; segId < Constants.MaxSegments; segId++) 376 { 377 int lvlSeg = defaultFiltLvl; 378 if (seg.IsSegFeatureActive(segId, SegLvlFeatures.SegLvlAltLf) != 0) 379 { 380 int data = seg.GetSegData(segId, SegLvlFeatures.SegLvlAltLf); 381 lvlSeg = Math.Clamp(seg.AbsDelta == Constants.SegmentAbsData ? data : defaultFiltLvl + data, 0, MaxLoopFilter); 382 } 383 384 if (!lf.ModeRefDeltaEnabled) 385 { 386 // We could get rid of this if we assume that deltas are set to 387 // zero when not in use; encoder always uses deltas 388 MemoryMarshal.Cast<Array2<byte>, byte>(lfi.Lvl[segId].AsSpan()).Fill((byte)lvlSeg); 389 } 390 else 391 { 392 int refr, mode; 393 int intraLvl = lvlSeg + lf.RefDeltas[Constants.IntraFrame] * scale; 394 lfi.Lvl[segId][Constants.IntraFrame][0] = (byte)Math.Clamp(intraLvl, 0, MaxLoopFilter); 395 396 for (refr = Constants.LastFrame; refr < Constants.MaxRefFrames; ++refr) 397 { 398 for (mode = 0; mode < MaxModeLfDeltas; ++mode) 399 { 400 int interLvl = lvlSeg + lf.RefDeltas[refr] * scale + lf.ModeDeltas[mode] * scale; 401 lfi.Lvl[segId][refr][mode] = (byte)Math.Clamp(interLvl, 0, MaxLoopFilter); 402 } 403 } 404 } 405 } 406 } 407 } 408 }