/ src / ARMeilleure / Instructions / InstEmitHashHelper.cs
InstEmitHashHelper.cs
  1  // https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
  2  
  3  using ARMeilleure.IntermediateRepresentation;
  4  using ARMeilleure.Translation;
  5  using System;
  6  using System.Diagnostics;
  7  using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8  using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  9  
 10  namespace ARMeilleure.Instructions
 11  {
 12      static class InstEmitHashHelper
 13      {
 14          public const uint Crc32RevPoly = 0xedb88320;
 15          public const uint Crc32cRevPoly = 0x82f63b78;
 16  
 17          public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli)
 18          {
 19              Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger());
 20              Debug.Assert(size >= 0 && size < 4);
 21              Debug.Assert((size < 3) || (value.Type == OperandType.I64));
 22  
 23              if (castagnoli && Optimizations.UseSse42)
 24              {
 25                  // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers.
 26                  value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value;
 27                  crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc;
 28  
 29                  Intrinsic op = size switch
 30                  {
 31                      0 => Intrinsic.X86Crc32_8,
 32                      1 => Intrinsic.X86Crc32_16,
 33                      _ => Intrinsic.X86Crc32,
 34                  };
 35  
 36                  return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value);
 37              }
 38              else if (Optimizations.UsePclmulqdq)
 39              {
 40                  return size switch
 41                  {
 42                      3 => EmitCrc32Optimized64(context, crc, value, castagnoli),
 43                      _ => EmitCrc32Optimized(context, crc, value, castagnoli, size),
 44                  };
 45              }
 46              else
 47              {
 48                  string name = (size, castagnoli) switch
 49                  {
 50                      (0, false) => nameof(SoftFallback.Crc32b),
 51                      (1, false) => nameof(SoftFallback.Crc32h),
 52                      (2, false) => nameof(SoftFallback.Crc32w),
 53                      (3, false) => nameof(SoftFallback.Crc32x),
 54                      (0, true) => nameof(SoftFallback.Crc32cb),
 55                      (1, true) => nameof(SoftFallback.Crc32ch),
 56                      (2, true) => nameof(SoftFallback.Crc32cw),
 57                      (3, true) => nameof(SoftFallback.Crc32cx),
 58                      _ => throw new ArgumentOutOfRangeException(nameof(size)),
 59                  };
 60  
 61                  return context.Call(typeof(SoftFallback).GetMethod(name), crc, value);
 62              }
 63          }
 64  
 65          private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size)
 66          {
 67              long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
 68              long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
 69  
 70              crc = context.VectorInsert(context.VectorZero(), crc, 0);
 71  
 72              switch (size)
 73              {
 74                  case 0:
 75                      data = context.VectorInsert8(context.VectorZero(), data, 0);
 76                      break;
 77                  case 1:
 78                      data = context.VectorInsert16(context.VectorZero(), data, 0);
 79                      break;
 80                  case 2:
 81                      data = context.VectorInsert(context.VectorZero(), data, 0);
 82                      break;
 83              }
 84  
 85              int bitsize = 8 << size;
 86  
 87              Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
 88              tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize));
 89              tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0));
 90              tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
 91  
 92              if (bitsize < 32)
 93              {
 94                  crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8));
 95                  tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc);
 96              }
 97  
 98              return context.VectorExtract(OperandType.I32, tmp, 2);
 99          }
100  
101          private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli)
102          {
103              long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
104              long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1
105  
106              crc = context.VectorInsert(context.VectorZero(), crc, 0);
107              data = context.VectorInsert(context.VectorZero(), data, 0);
108  
109              Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
110              Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));
111  
112              tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
113              tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
114  
115              tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
116              tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));
117  
118              tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
119              tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));
120  
121              return context.VectorExtract(OperandType.I32, tmp, 2);
122          }
123      }
124  }