InstEmitHashHelper.cs
1 // https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf 2 3 using ARMeilleure.IntermediateRepresentation; 4 using ARMeilleure.Translation; 5 using System; 6 using System.Diagnostics; 7 using static ARMeilleure.Instructions.InstEmitSimdHelper; 8 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 9 10 namespace ARMeilleure.Instructions 11 { 12 static class InstEmitHashHelper 13 { 14 public const uint Crc32RevPoly = 0xedb88320; 15 public const uint Crc32cRevPoly = 0x82f63b78; 16 17 public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli) 18 { 19 Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger()); 20 Debug.Assert(size >= 0 && size < 4); 21 Debug.Assert((size < 3) || (value.Type == OperandType.I64)); 22 23 if (castagnoli && Optimizations.UseSse42) 24 { 25 // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers. 26 value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value; 27 crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc; 28 29 Intrinsic op = size switch 30 { 31 0 => Intrinsic.X86Crc32_8, 32 1 => Intrinsic.X86Crc32_16, 33 _ => Intrinsic.X86Crc32, 34 }; 35 36 return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value); 37 } 38 else if (Optimizations.UsePclmulqdq) 39 { 40 return size switch 41 { 42 3 => EmitCrc32Optimized64(context, crc, value, castagnoli), 43 _ => EmitCrc32Optimized(context, crc, value, castagnoli, size), 44 }; 45 } 46 else 47 { 48 string name = (size, castagnoli) switch 49 { 50 (0, false) => nameof(SoftFallback.Crc32b), 51 (1, false) => nameof(SoftFallback.Crc32h), 52 (2, false) => nameof(SoftFallback.Crc32w), 53 (3, false) => nameof(SoftFallback.Crc32x), 54 (0, true) => nameof(SoftFallback.Crc32cb), 55 (1, true) => nameof(SoftFallback.Crc32ch), 56 (2, true) => nameof(SoftFallback.Crc32cw), 57 (3, true) => nameof(SoftFallback.Crc32cx), 58 _ => throw new ArgumentOutOfRangeException(nameof(size)), 59 }; 60 61 return context.Call(typeof(SoftFallback).GetMethod(name), crc, value); 62 } 63 } 64 65 private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size) 66 { 67 long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))' 68 long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1 69 70 crc = context.VectorInsert(context.VectorZero(), crc, 0); 71 72 switch (size) 73 { 74 case 0: 75 data = context.VectorInsert8(context.VectorZero(), data, 0); 76 break; 77 case 1: 78 data = context.VectorInsert16(context.VectorZero(), data, 0); 79 break; 80 case 2: 81 data = context.VectorInsert(context.VectorZero(), data, 0); 82 break; 83 } 84 85 int bitsize = 8 << size; 86 87 Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data); 88 tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize)); 89 tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0)); 90 tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); 91 92 if (bitsize < 32) 93 { 94 crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8)); 95 tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc); 96 } 97 98 return context.VectorExtract(OperandType.I32, tmp, 2); 99 } 100 101 private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli) 102 { 103 long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))' 104 long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1 105 106 crc = context.VectorInsert(context.VectorZero(), crc, 0); 107 data = context.VectorInsert(context.VectorZero(), data, 0); 108 109 Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data); 110 Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4)); 111 112 tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0)); 113 tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); 114 115 tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res); 116 tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32)); 117 118 tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1)); 119 tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); 120 121 return context.VectorExtract(OperandType.I32, tmp, 2); 122 } 123 } 124 }