/ src / ARMeilleure / Instructions / SoftFallback.cs
SoftFallback.cs
  1  using ARMeilleure.State;
  2  using System;
  3  
  4  namespace ARMeilleure.Instructions
  5  {
  6      static class SoftFallback
  7      {
  8          #region "ShrImm64"
  9          public static long SignedShrImm64(long value, long roundConst, int shift)
 10          {
 11              if (roundConst == 0L)
 12              {
 13                  if (shift <= 63)
 14                  {
 15                      return value >> shift;
 16                  }
 17                  else /* if (shift == 64) */
 18                  {
 19                      if (value < 0L)
 20                      {
 21                          return -1L;
 22                      }
 23                      else /* if (value >= 0L) */
 24                      {
 25                          return 0L;
 26                      }
 27                  }
 28              }
 29              else /* if (roundConst == 1L << (shift - 1)) */
 30              {
 31                  if (shift <= 63)
 32                  {
 33                      long add = value + roundConst;
 34  
 35                      if ((~value & (value ^ add)) < 0L)
 36                      {
 37                          return (long)((ulong)add >> shift);
 38                      }
 39                      else
 40                      {
 41                          return add >> shift;
 42                      }
 43                  }
 44                  else /* if (shift == 64) */
 45                  {
 46                      return 0L;
 47                  }
 48              }
 49          }
 50  
 51          public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift)
 52          {
 53              if (roundConst == 0L)
 54              {
 55                  if (shift <= 63)
 56                  {
 57                      return value >> shift;
 58                  }
 59                  else /* if (shift == 64) */
 60                  {
 61                      return 0UL;
 62                  }
 63              }
 64              else /* if (roundConst == 1L << (shift - 1)) */
 65              {
 66                  ulong add = value + (ulong)roundConst;
 67  
 68                  if ((add < value) && (add < (ulong)roundConst))
 69                  {
 70                      if (shift <= 63)
 71                      {
 72                          return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
 73                      }
 74                      else /* if (shift == 64) */
 75                      {
 76                          return 1UL;
 77                      }
 78                  }
 79                  else
 80                  {
 81                      if (shift <= 63)
 82                      {
 83                          return add >> shift;
 84                      }
 85                      else /* if (shift == 64) */
 86                      {
 87                          return 0UL;
 88                      }
 89                  }
 90              }
 91          }
 92          #endregion
 93  
 94          #region "Saturation"
 95          public static int SatF32ToS32(float value)
 96          {
 97              if (float.IsNaN(value))
 98              {
 99                  return 0;
100              }
101  
102              return value >= int.MaxValue ? int.MaxValue :
103                     value <= int.MinValue ? int.MinValue : (int)value;
104          }
105  
106          public static long SatF32ToS64(float value)
107          {
108              if (float.IsNaN(value))
109              {
110                  return 0;
111              }
112  
113              return value >= long.MaxValue ? long.MaxValue :
114                     value <= long.MinValue ? long.MinValue : (long)value;
115          }
116  
117          public static uint SatF32ToU32(float value)
118          {
119              if (float.IsNaN(value))
120              {
121                  return 0;
122              }
123  
124              return value >= uint.MaxValue ? uint.MaxValue :
125                     value <= uint.MinValue ? uint.MinValue : (uint)value;
126          }
127  
128          public static ulong SatF32ToU64(float value)
129          {
130              if (float.IsNaN(value))
131              {
132                  return 0;
133              }
134  
135              return value >= ulong.MaxValue ? ulong.MaxValue :
136                     value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
137          }
138  
139          public static int SatF64ToS32(double value)
140          {
141              if (double.IsNaN(value))
142              {
143                  return 0;
144              }
145  
146              return value >= int.MaxValue ? int.MaxValue :
147                     value <= int.MinValue ? int.MinValue : (int)value;
148          }
149  
150          public static long SatF64ToS64(double value)
151          {
152              if (double.IsNaN(value))
153              {
154                  return 0;
155              }
156  
157              return value >= long.MaxValue ? long.MaxValue :
158                     value <= long.MinValue ? long.MinValue : (long)value;
159          }
160  
161          public static uint SatF64ToU32(double value)
162          {
163              if (double.IsNaN(value))
164              {
165                  return 0;
166              }
167  
168              return value >= uint.MaxValue ? uint.MaxValue :
169                     value <= uint.MinValue ? uint.MinValue : (uint)value;
170          }
171  
172          public static ulong SatF64ToU64(double value)
173          {
174              if (double.IsNaN(value))
175              {
176                  return 0;
177              }
178  
179              return value >= ulong.MaxValue ? ulong.MaxValue :
180                     value <= ulong.MinValue ? ulong.MinValue : (ulong)value;
181          }
182          #endregion
183  
184          #region "Count"
185          public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
186          {
187              value ^= value >> 1;
188  
189              int highBit = size - 2;
190  
191              for (int bit = highBit; bit >= 0; bit--)
192              {
193                  if (((int)(value >> bit) & 0b1) != 0)
194                  {
195                      return (ulong)(highBit - bit);
196                  }
197              }
198  
199              return (ulong)(size - 1);
200          }
201  
202          private static ReadOnlySpan<byte> ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
203  
204          public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
205          {
206              if (value == 0ul)
207              {
208                  return (ulong)size;
209              }
210  
211              int nibbleIdx = size;
212              int preCount, count = 0;
213  
214              do
215              {
216                  nibbleIdx -= 4;
217                  preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111];
218                  count += preCount;
219              }
220              while (preCount == 4);
221  
222              return (ulong)count;
223          }
224          #endregion
225  
226          #region "Table"
227          public static V128 Tbl1(V128 vector, int bytes, V128 tb0)
228          {
229              return TblOrTbx(default, vector, bytes, tb0);
230          }
231  
232          public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1)
233          {
234              return TblOrTbx(default, vector, bytes, tb0, tb1);
235          }
236  
237          public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
238          {
239              return TblOrTbx(default, vector, bytes, tb0, tb1, tb2);
240          }
241  
242          public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
243          {
244              return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3);
245          }
246  
247          public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0)
248          {
249              return TblOrTbx(dest, vector, bytes, tb0);
250          }
251  
252          public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1)
253          {
254              return TblOrTbx(dest, vector, bytes, tb0, tb1);
255          }
256  
257          public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2)
258          {
259              return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2);
260          }
261  
262          public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3)
263          {
264              return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3);
265          }
266  
267          private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb)
268          {
269              byte[] res = new byte[16];
270  
271              if (dest != default)
272              {
273                  Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes);
274              }
275  
276              byte[] table = new byte[tb.Length * 16];
277  
278              for (byte index = 0; index < tb.Length; index++)
279              {
280                  Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16);
281              }
282  
283              byte[] v = vector.ToArray();
284  
285              for (byte index = 0; index < bytes; index++)
286              {
287                  byte tblIndex = v[index];
288  
289                  if (tblIndex < table.Length)
290                  {
291                      res[index] = table[tblIndex];
292                  }
293              }
294  
295              return new V128(res);
296          }
297          #endregion
298  
299          #region "Crc32"
300          private const uint Crc32RevPoly = 0xedb88320;
301          private const uint Crc32cRevPoly = 0x82f63b78;
302  
303          public static uint Crc32b(uint crc, byte value) => Crc32(crc, Crc32RevPoly, value);
304          public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value);
305          public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value);
306          public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value);
307  
308          public static uint Crc32cb(uint crc, byte value) => Crc32(crc, Crc32cRevPoly, value);
309          public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value);
310          public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value);
311          public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value);
312  
313          private static uint Crc32h(uint crc, uint poly, ushort val)
314          {
315              crc = Crc32(crc, poly, (byte)(val >> 0));
316              crc = Crc32(crc, poly, (byte)(val >> 8));
317  
318              return crc;
319          }
320  
321          private static uint Crc32w(uint crc, uint poly, uint val)
322          {
323              crc = Crc32(crc, poly, (byte)(val >> 0));
324              crc = Crc32(crc, poly, (byte)(val >> 8));
325              crc = Crc32(crc, poly, (byte)(val >> 16));
326              crc = Crc32(crc, poly, (byte)(val >> 24));
327  
328              return crc;
329          }
330  
331          private static uint Crc32x(uint crc, uint poly, ulong val)
332          {
333              crc = Crc32(crc, poly, (byte)(val >> 0));
334              crc = Crc32(crc, poly, (byte)(val >> 8));
335              crc = Crc32(crc, poly, (byte)(val >> 16));
336              crc = Crc32(crc, poly, (byte)(val >> 24));
337              crc = Crc32(crc, poly, (byte)(val >> 32));
338              crc = Crc32(crc, poly, (byte)(val >> 40));
339              crc = Crc32(crc, poly, (byte)(val >> 48));
340              crc = Crc32(crc, poly, (byte)(val >> 56));
341  
342              return crc;
343          }
344  
345          private static uint Crc32(uint crc, uint poly, byte val)
346          {
347              crc ^= val;
348  
349              for (int bit = 7; bit >= 0; bit--)
350              {
351                  uint mask = (uint)(-(int)(crc & 1));
352  
353                  crc = (crc >> 1) ^ (poly & mask);
354              }
355  
356              return crc;
357          }
358          #endregion
359  
360          #region "Aes"
361          public static V128 Decrypt(V128 value, V128 roundKey)
362          {
363              return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey));
364          }
365  
366          public static V128 Encrypt(V128 value, V128 roundKey)
367          {
368              return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey));
369          }
370  
371          public static V128 InverseMixColumns(V128 value)
372          {
373              return CryptoHelper.AesInvMixColumns(value);
374          }
375  
376          public static V128 MixColumns(V128 value)
377          {
378              return CryptoHelper.AesMixColumns(value);
379          }
380          #endregion
381  
382          #region "Sha1"
383          public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk)
384          {
385              for (int e = 0; e <= 3; e++)
386              {
387                  uint t = ShaChoose(hash_abcd.Extract<uint>(1),
388                                     hash_abcd.Extract<uint>(2),
389                                     hash_abcd.Extract<uint>(3));
390  
391                  hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
392  
393                  t = Rol(hash_abcd.Extract<uint>(1), 30);
394  
395                  hash_abcd.Insert(1, t);
396  
397                  Rol32_160(ref hash_e, ref hash_abcd);
398              }
399  
400              return hash_abcd;
401          }
402  
403          public static uint FixedRotate(uint hash_e)
404          {
405              return hash_e.Rol(30);
406          }
407  
408          public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk)
409          {
410              for (int e = 0; e <= 3; e++)
411              {
412                  uint t = ShaMajority(hash_abcd.Extract<uint>(1),
413                                       hash_abcd.Extract<uint>(2),
414                                       hash_abcd.Extract<uint>(3));
415  
416                  hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
417  
418                  t = Rol(hash_abcd.Extract<uint>(1), 30);
419  
420                  hash_abcd.Insert(1, t);
421  
422                  Rol32_160(ref hash_e, ref hash_abcd);
423              }
424  
425              return hash_abcd;
426          }
427  
428          public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk)
429          {
430              for (int e = 0; e <= 3; e++)
431              {
432                  uint t = ShaParity(hash_abcd.Extract<uint>(1),
433                                     hash_abcd.Extract<uint>(2),
434                                     hash_abcd.Extract<uint>(3));
435  
436                  hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e);
437  
438                  t = Rol(hash_abcd.Extract<uint>(1), 30);
439  
440                  hash_abcd.Insert(1, t);
441  
442                  Rol32_160(ref hash_e, ref hash_abcd);
443              }
444  
445              return hash_abcd;
446          }
447  
448          public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11)
449          {
450              ulong t2 = w4_7.Extract<ulong>(0);
451              ulong t1 = w0_3.Extract<ulong>(1);
452  
453              V128 result = new(t1, t2);
454  
455              return result ^ (w0_3 ^ w8_11);
456          }
457  
458          public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15)
459          {
460              V128 t = tw0_3 ^ (w12_15 >> 32);
461  
462              uint tE0 = t.Extract<uint>(0);
463              uint tE1 = t.Extract<uint>(1);
464              uint tE2 = t.Extract<uint>(2);
465              uint tE3 = t.Extract<uint>(3);
466  
467              return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2));
468          }
469  
470          private static void Rol32_160(ref uint y, ref V128 x)
471          {
472              uint xE3 = x.Extract<uint>(3);
473  
474              x <<= 32;
475              x.Insert(0, y);
476  
477              y = xE3;
478          }
479  
480          private static uint ShaChoose(uint x, uint y, uint z)
481          {
482              return ((y ^ z) & x) ^ z;
483          }
484  
485          private static uint ShaMajority(uint x, uint y, uint z)
486          {
487              return (x & y) | ((x | y) & z);
488          }
489  
490          private static uint ShaParity(uint x, uint y, uint z)
491          {
492              return x ^ y ^ z;
493          }
494  
495          private static uint Rol(this uint value, int count)
496          {
497              return (value << count) | (value >> (32 - count));
498          }
499          #endregion
500  
501          #region "Sha256"
502          public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk)
503          {
504              return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
505          }
506  
507          public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
508          {
509              return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
510          }
511  
512          public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7)
513          {
514              V128 result = new();
515  
516              for (int e = 0; e <= 3; e++)
517              {
518                  uint elt = (e <= 2 ? w0_3 : w4_7).Extract<uint>(e <= 2 ? e + 1 : 0);
519  
520                  elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
521  
522                  elt += w0_3.Extract<uint>(e);
523  
524                  result.Insert(e, elt);
525              }
526  
527              return result;
528          }
529  
530          public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15)
531          {
532              V128 result = new();
533  
534              ulong t1 = w12_15.Extract<ulong>(1);
535  
536              for (int e = 0; e <= 1; e++)
537              {
538                  uint elt = t1.ULongPart(e);
539  
540                  elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
541  
542                  elt += w0_3.Extract<uint>(e) + w8_11.Extract<uint>(e + 1);
543  
544                  result.Insert(e, elt);
545              }
546  
547              t1 = result.Extract<ulong>(0);
548  
549              for (int e = 2; e <= 3; e++)
550              {
551                  uint elt = t1.ULongPart(e - 2);
552  
553                  elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
554  
555                  elt += w0_3.Extract<uint>(e) + (e == 2 ? w8_11 : w12_15).Extract<uint>(e == 2 ? 3 : 0);
556  
557                  result.Insert(e, elt);
558              }
559  
560              return result;
561          }
562  
563          private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1)
564          {
565              for (int e = 0; e <= 3; e++)
566              {
567                  uint chs = ShaChoose(y.Extract<uint>(0),
568                                       y.Extract<uint>(1),
569                                       y.Extract<uint>(2));
570  
571                  uint maj = ShaMajority(x.Extract<uint>(0),
572                                         x.Extract<uint>(1),
573                                         x.Extract<uint>(2));
574  
575                  uint t1 = y.Extract<uint>(3) + ShaHashSigma1(y.Extract<uint>(0)) + chs + w.Extract<uint>(e);
576  
577                  uint t2 = t1 + x.Extract<uint>(3);
578  
579                  x.Insert(3, t2);
580  
581                  t2 = t1 + ShaHashSigma0(x.Extract<uint>(0)) + maj;
582  
583                  y.Insert(3, t2);
584  
585                  Rol32_256(ref y, ref x);
586              }
587  
588              return part1 ? x : y;
589          }
590  
591          private static void Rol32_256(ref V128 y, ref V128 x)
592          {
593              uint yE3 = y.Extract<uint>(3);
594              uint xE3 = x.Extract<uint>(3);
595  
596              y <<= 32;
597              x <<= 32;
598  
599              y.Insert(0, xE3);
600              x.Insert(0, yE3);
601          }
602  
603          private static uint ShaHashSigma0(uint x)
604          {
605              return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
606          }
607  
608          private static uint ShaHashSigma1(uint x)
609          {
610              return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
611          }
612  
613          private static uint Ror(this uint value, int count)
614          {
615              return (value >> count) | (value << (32 - count));
616          }
617  
618          private static uint Lsr(this uint value, int count)
619          {
620              return value >> count;
621          }
622  
623          private static uint ULongPart(this ulong value, int part)
624          {
625              return part == 0
626                  ? (uint)(value & 0xFFFFFFFFUL)
627                  : (uint)(value >> 32);
628          }
629          #endregion
630  
631          public static V128 PolynomialMult64_128(ulong op1, ulong op2)
632          {
633              V128 result = V128.Zero;
634  
635              V128 op2_128 = new(op2, 0);
636  
637              for (int i = 0; i < 64; i++)
638              {
639                  if (((op1 >> i) & 1) == 1)
640                  {
641                      result ^= op2_128 << i;
642                  }
643              }
644  
645              return result;
646          }
647      }
648  }