/ src / ARMeilleure / Instructions / InstEmitSimdHelper.cs
InstEmitSimdHelper.cs
   1  using ARMeilleure.CodeGen.X86;
   2  using ARMeilleure.Decoders;
   3  using ARMeilleure.IntermediateRepresentation;
   4  using ARMeilleure.State;
   5  using ARMeilleure.Translation;
   6  using System;
   7  using System.Diagnostics;
   8  using System.Reflection;
   9  using static ARMeilleure.Instructions.InstEmitHelper;
  10  using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  11  
  12  namespace ARMeilleure.Instructions
  13  {
  14      using Func1I = Func<Operand, Operand>;
  15      using Func2I = Func<Operand, Operand, Operand>;
  16      using Func3I = Func<Operand, Operand, Operand, Operand>;
  17  
  18      static class InstEmitSimdHelper
  19      {
  20          #region "Masks"
  21          public static readonly long[] EvenMasks = new long[]
  22          {
  23              14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B
  24              13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H
  25              11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0, // S
  26          };
  27  
  28          public static readonly long[] OddMasks = new long[]
  29          {
  30              15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B
  31              15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H
  32              15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0, // S
  33          };
  34  
  35          public const long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0;
  36  
  37          public static ulong X86GetGf2p8LogicalShiftLeft(int shift)
  38          {
  39              ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) |
  40                               (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0);
  41  
  42              return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8);
  43          }
  44          #endregion
  45  
  46          #region "X86 SSE Intrinsics"
  47          public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[]
  48          {
  49              Intrinsic.X86Paddb,
  50              Intrinsic.X86Paddw,
  51              Intrinsic.X86Paddd,
  52              Intrinsic.X86Paddq,
  53          };
  54  
  55          public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[]
  56          {
  57              Intrinsic.X86Pcmpeqb,
  58              Intrinsic.X86Pcmpeqw,
  59              Intrinsic.X86Pcmpeqd,
  60              Intrinsic.X86Pcmpeqq,
  61          };
  62  
  63          public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[]
  64          {
  65              Intrinsic.X86Pcmpgtb,
  66              Intrinsic.X86Pcmpgtw,
  67              Intrinsic.X86Pcmpgtd,
  68              Intrinsic.X86Pcmpgtq,
  69          };
  70  
  71          public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[]
  72          {
  73              Intrinsic.X86Pmaxsb,
  74              Intrinsic.X86Pmaxsw,
  75              Intrinsic.X86Pmaxsd,
  76          };
  77  
  78          public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[]
  79          {
  80              Intrinsic.X86Pmaxub,
  81              Intrinsic.X86Pmaxuw,
  82              Intrinsic.X86Pmaxud,
  83          };
  84  
  85          public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[]
  86          {
  87              Intrinsic.X86Pminsb,
  88              Intrinsic.X86Pminsw,
  89              Intrinsic.X86Pminsd,
  90          };
  91  
  92          public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[]
  93          {
  94              Intrinsic.X86Pminub,
  95              Intrinsic.X86Pminuw,
  96              Intrinsic.X86Pminud,
  97          };
  98  
  99          public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[]
 100          {
 101              Intrinsic.X86Pmovsxbw,
 102              Intrinsic.X86Pmovsxwd,
 103              Intrinsic.X86Pmovsxdq,
 104          };
 105  
 106          public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[]
 107          {
 108              Intrinsic.X86Pmovzxbw,
 109              Intrinsic.X86Pmovzxwd,
 110              Intrinsic.X86Pmovzxdq,
 111          };
 112  
 113          public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[]
 114          {
 115              0,
 116              Intrinsic.X86Psllw,
 117              Intrinsic.X86Pslld,
 118              Intrinsic.X86Psllq,
 119          };
 120  
 121          public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[]
 122          {
 123              0,
 124              Intrinsic.X86Psraw,
 125              Intrinsic.X86Psrad,
 126          };
 127  
 128          public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[]
 129          {
 130              0,
 131              Intrinsic.X86Psrlw,
 132              Intrinsic.X86Psrld,
 133              Intrinsic.X86Psrlq,
 134          };
 135  
 136          public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[]
 137          {
 138              Intrinsic.X86Psubb,
 139              Intrinsic.X86Psubw,
 140              Intrinsic.X86Psubd,
 141              Intrinsic.X86Psubq,
 142          };
 143  
 144          public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[]
 145          {
 146              Intrinsic.X86Punpckhbw,
 147              Intrinsic.X86Punpckhwd,
 148              Intrinsic.X86Punpckhdq,
 149              Intrinsic.X86Punpckhqdq,
 150          };
 151  
 152          public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[]
 153          {
 154              Intrinsic.X86Punpcklbw,
 155              Intrinsic.X86Punpcklwd,
 156              Intrinsic.X86Punpckldq,
 157              Intrinsic.X86Punpcklqdq,
 158          };
 159          #endregion
 160  
 161          public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
 162          {
 163              if (Optimizations.UseSse2)
 164              {
 165                  Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
 166  
 167                  Operand fzTrue = getFpFlag(FPState.FzFlag);
 168                  Operand r0True = getFpFlag(FPState.RMode0Flag);
 169                  Operand r1True = getFpFlag(FPState.RMode1Flag);
 170  
 171                  mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
 172  
 173                  mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
 174  
 175                  // X86 round modes in order: nearest, negative, positive, zero
 176                  // ARM round modes in order: nearest, positive, negative, zero
 177                  // Read the bits backwards to correct this.
 178  
 179                  mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
 180                  mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
 181  
 182                  context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
 183              }
 184              else if (Optimizations.UseAdvSimd)
 185              {
 186                  Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
 187  
 188                  Operand fzTrue = getFpFlag(FPState.FzFlag);
 189                  Operand r0True = getFpFlag(FPState.RMode0Flag);
 190                  Operand r1True = getFpFlag(FPState.RMode1Flag);
 191  
 192                  fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
 193  
 194                  fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
 195                  fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
 196                  fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
 197  
 198                  context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
 199  
 200                  // TODO: Restore FPSR
 201              }
 202          }
 203  
 204          public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
 205          {
 206              if (Optimizations.UseSse2)
 207              {
 208                  Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
 209  
 210                  // Unset round mode (to nearest) and ftz.
 211                  mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
 212  
 213                  context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
 214  
 215                  // Status flags would be stored here if they were used.
 216              }
 217              else if (Optimizations.UseAdvSimd)
 218              {
 219                  Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
 220  
 221                  // Unset round mode (to nearest) and fz.
 222                  fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
 223  
 224                  context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
 225  
 226                  // TODO: Store FPSR
 227              }
 228          }
 229  
 230          public static int GetImmShl(OpCodeSimdShImm op)
 231          {
 232              return op.Imm - (8 << op.Size);
 233          }
 234  
 235          public static int GetImmShr(OpCodeSimdShImm op)
 236          {
 237              return (8 << (op.Size + 1)) - op.Imm;
 238          }
 239  
 240          public static Operand X86GetScalar(ArmEmitterContext context, float value)
 241          {
 242              return X86GetScalar(context, BitConverter.SingleToInt32Bits(value));
 243          }
 244  
 245          public static Operand X86GetScalar(ArmEmitterContext context, double value)
 246          {
 247              return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value));
 248          }
 249  
 250          public static Operand X86GetScalar(ArmEmitterContext context, int value)
 251          {
 252              return context.VectorCreateScalar(Const(value));
 253          }
 254  
 255          public static Operand X86GetScalar(ArmEmitterContext context, long value)
 256          {
 257              return context.VectorCreateScalar(Const(value));
 258          }
 259  
 260          public static Operand X86GetAllElements(ArmEmitterContext context, float value)
 261          {
 262              return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value));
 263          }
 264  
 265          public static Operand X86GetAllElements(ArmEmitterContext context, double value)
 266          {
 267              return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value));
 268          }
 269  
 270          public static Operand X86GetAllElements(ArmEmitterContext context, short value)
 271          {
 272              ulong value1 = (ushort)value;
 273              ulong value2 = value1 << 16 | value1;
 274              ulong value4 = value2 << 32 | value2;
 275  
 276              return X86GetAllElements(context, (long)value4);
 277          }
 278  
 279          public static Operand X86GetAllElements(ArmEmitterContext context, int value)
 280          {
 281              Operand vector = context.VectorCreateScalar(Const(value));
 282  
 283              vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0));
 284  
 285              return vector;
 286          }
 287  
 288          public static Operand X86GetAllElements(ArmEmitterContext context, long value)
 289          {
 290              Operand vector = context.VectorCreateScalar(Const(value));
 291  
 292              vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector);
 293  
 294              return vector;
 295          }
 296  
 297          public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0)
 298          {
 299              return X86GetElements(context, (ulong)e1, (ulong)e0);
 300          }
 301  
 302          public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0)
 303          {
 304              Operand vector0 = context.VectorCreateScalar(Const(e0));
 305              Operand vector1 = context.VectorCreateScalar(Const(e1));
 306  
 307              return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1);
 308          }
 309  
 310          public static int X86GetRoundControl(FPRoundingMode roundMode)
 311          {
 312              return roundMode switch
 313              {
 314  #pragma warning disable IDE0055 // Disable formatting
 315                  FPRoundingMode.ToNearest            => 8 | 0, // even
 316                  FPRoundingMode.TowardsPlusInfinity  => 8 | 2,
 317                  FPRoundingMode.TowardsMinusInfinity => 8 | 1,
 318                  FPRoundingMode.TowardsZero          => 8 | 3,
 319                  _ => throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."),
 320  #pragma warning restore IDE0055
 321              };
 322          }
 323  
 324          public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar)
 325          {
 326              Debug.Assert(n.Type == OperandType.V128);
 327  
 328              Operand nCopy = context.Copy(n);
 329  
 330              Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero));
 331  
 332              IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
 333  
 334              if ((op.Size & 1) == 0)
 335              {
 336                  Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue);
 337                  signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
 338  
 339                  // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1
 340                  Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF);
 341                  valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
 342  
 343                  nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask);
 344  
 345                  nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC);
 346              }
 347              else
 348              {
 349                  Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue);
 350                  signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy);
 351  
 352                  // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L
 353                  Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL);
 354                  valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask);
 355  
 356                  nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask);
 357  
 358                  nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC);
 359              }
 360  
 361              return nCopy;
 362          }
 363  
 364          public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.).
 365          {
 366              Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64);
 367  
 368              Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L)));
 369  
 370              Operand c1 = Const(op.Type, 0x33L);
 371              Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1));
 372  
 373              return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL));
 374          }
 375  
 376          public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
 377          {
 378              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 379  
 380              Operand n = GetVec(op.Rn);
 381  
 382              Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
 383  
 384              Operand res = context.AddIntrinsic(inst, n);
 385  
 386              if ((op.Size & 1) != 0)
 387              {
 388                  res = context.VectorZeroUpper64(res);
 389              }
 390              else
 391              {
 392                  res = context.VectorZeroUpper96(res);
 393              }
 394  
 395              context.Copy(GetVec(op.Rd), res);
 396          }
 397  
 398          public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
 399          {
 400              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 401  
 402              Operand n = GetVec(op.Rn);
 403              Operand m = GetVec(op.Rm);
 404  
 405              Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
 406  
 407              Operand res = context.AddIntrinsic(inst, n, m);
 408  
 409              if ((op.Size & 1) != 0)
 410              {
 411                  res = context.VectorZeroUpper64(res);
 412              }
 413              else
 414              {
 415                  res = context.VectorZeroUpper96(res);
 416              }
 417  
 418              context.Copy(GetVec(op.Rd), res);
 419          }
 420  
 421          public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
 422          {
 423              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 424  
 425              Operand n = GetVec(op.Rn);
 426  
 427              Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
 428  
 429              Operand res = context.AddIntrinsic(inst, n);
 430  
 431              if (op.RegisterSize == RegisterSize.Simd64)
 432              {
 433                  res = context.VectorZeroUpper64(res);
 434              }
 435  
 436              context.Copy(GetVec(op.Rd), res);
 437          }
 438  
 439          public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
 440          {
 441              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 442  
 443              Operand n = GetVec(op.Rn);
 444              Operand m = GetVec(op.Rm);
 445  
 446              Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
 447  
 448              Operand res = context.AddIntrinsic(inst, n, m);
 449  
 450              if (op.RegisterSize == RegisterSize.Simd64)
 451              {
 452                  res = context.VectorZeroUpper64(res);
 453              }
 454  
 455              context.Copy(GetVec(op.Rd), res);
 456          }
 457  
 458          public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n)
 459          {
 460              IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
 461  
 462              MethodInfo info = (op.Size & 1) == 0
 463                  ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) })
 464                  : typeof(Math).GetMethod(name, new Type[] { typeof(double) });
 465  
 466              return context.Call(info, n);
 467          }
 468  
 469          public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n)
 470          {
 471              IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
 472  
 473              string name = nameof(Math.Round);
 474  
 475              MethodInfo info = (op.Size & 1) == 0
 476                  ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) })
 477                  : typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) });
 478  
 479              return context.Call(info, n, Const((int)roundMode));
 480          }
 481  
 482          public static Operand EmitGetRoundingMode(ArmEmitterContext context)
 483          {
 484              Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1));
 485              rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag));
 486  
 487              return rMode;
 488          }
 489  
 490          public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op)
 491          {
 492              Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64);
 493  
 494              Operand lbl1 = Label();
 495              Operand lbl2 = Label();
 496              Operand lbl3 = Label();
 497              Operand lblEnd = Label();
 498  
 499              Operand rN = Const((int)FPRoundingMode.ToNearest);
 500              Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity);
 501              Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity);
 502  
 503              Operand res = context.AllocateLocal(op.Type);
 504  
 505              Operand rMode = EmitGetRoundingMode(context);
 506  
 507              context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual);
 508              context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op));
 509              context.Branch(lblEnd);
 510  
 511              context.MarkLabel(lbl1);
 512              context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual);
 513              context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op));
 514              context.Branch(lblEnd);
 515  
 516              context.MarkLabel(lbl2);
 517              context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual);
 518              context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op));
 519              context.Branch(lblEnd);
 520  
 521              context.MarkLabel(lbl3);
 522              context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op));
 523              context.Branch(lblEnd);
 524  
 525              context.MarkLabel(lblEnd);
 526  
 527              return res;
 528          }
 529  
 530          public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs)
 531          {
 532              IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
 533  
 534              MethodInfo info = (op.Size & 1) == 0
 535                  ? typeof(SoftFloat32).GetMethod(name)
 536                  : typeof(SoftFloat64).GetMethod(name);
 537  
 538              context.ExitArmFpMode();
 539              context.StoreToContext();
 540              Operand res = context.Call(info, callArgs);
 541              context.LoadFromContext();
 542              context.EnterArmFpMode();
 543  
 544              return res;
 545          }
 546  
 547          public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
 548          {
 549              OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 550  
 551              OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
 552  
 553              Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
 554              Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
 555  
 556              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
 557          }
 558  
 559          public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
 560          {
 561              OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 562  
 563              OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
 564  
 565              Operand d = context.VectorExtract(type, GetVec(op.Rd), 0);
 566              Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
 567              Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index);
 568  
 569              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0));
 570          }
 571  
 572          public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit)
 573          {
 574              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 575  
 576              Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
 577  
 578              Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
 579  
 580              context.Copy(GetVec(op.Rd), d);
 581          }
 582  
 583          public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit)
 584          {
 585              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 586  
 587              Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size);
 588              Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size);
 589  
 590              Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
 591  
 592              context.Copy(GetVec(op.Rd), d);
 593          }
 594  
 595          public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit)
 596          {
 597              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 598  
 599              Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
 600  
 601              Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size);
 602  
 603              context.Copy(GetVec(op.Rd), d);
 604          }
 605  
 606          public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit)
 607          {
 608              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 609  
 610              Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
 611              Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
 612  
 613              Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size);
 614  
 615              context.Copy(GetVec(op.Rd), d);
 616          }
 617  
 618          public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit)
 619          {
 620              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 621  
 622              Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size);
 623              Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size);
 624              Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size);
 625  
 626              d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size);
 627  
 628              context.Copy(GetVec(op.Rd), d);
 629          }
 630  
 631          public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit)
 632          {
 633              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 634  
 635              OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
 636  
 637              Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
 638  
 639              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0));
 640          }
 641  
 642          public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit)
 643          {
 644              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 645  
 646              OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
 647  
 648              Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
 649              Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
 650  
 651              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0));
 652          }
 653  
 654          public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit)
 655          {
 656              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 657  
 658              OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
 659  
 660              Operand a = context.VectorExtract(type, GetVec(op.Ra), 0);
 661              Operand n = context.VectorExtract(type, GetVec(op.Rn), 0);
 662              Operand m = context.VectorExtract(type, GetVec(op.Rm), 0);
 663  
 664              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0));
 665          }
 666  
 667          public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit)
 668          {
 669              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 670  
 671              Operand res = context.VectorZero();
 672  
 673              int sizeF = op.Size & 1;
 674  
 675              OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
 676  
 677              int elems = op.GetBytesCount() >> sizeF + 2;
 678  
 679              for (int index = 0; index < elems; index++)
 680              {
 681                  Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
 682  
 683                  res = context.VectorInsert(res, emit(ne), index);
 684              }
 685  
 686              context.Copy(GetVec(op.Rd), res);
 687          }
 688  
 689          public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit)
 690          {
 691              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 692  
 693              Operand res = context.VectorZero();
 694  
 695              int sizeF = op.Size & 1;
 696  
 697              OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
 698  
 699              int elems = op.GetBytesCount() >> sizeF + 2;
 700  
 701              for (int index = 0; index < elems; index++)
 702              {
 703                  Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
 704                  Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
 705  
 706                  res = context.VectorInsert(res, emit(ne, me), index);
 707              }
 708  
 709              context.Copy(GetVec(op.Rd), res);
 710          }
 711  
 712          public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit)
 713          {
 714              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 715  
 716              Operand res = context.VectorZero();
 717  
 718              int sizeF = op.Size & 1;
 719  
 720              OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
 721  
 722              int elems = op.GetBytesCount() >> sizeF + 2;
 723  
 724              for (int index = 0; index < elems; index++)
 725              {
 726                  Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
 727                  Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
 728                  Operand me = context.VectorExtract(type, GetVec(op.Rm), index);
 729  
 730                  res = context.VectorInsert(res, emit(de, ne, me), index);
 731              }
 732  
 733              context.Copy(GetVec(op.Rd), res);
 734          }
 735  
 736          public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit)
 737          {
 738              OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 739  
 740              Operand res = context.VectorZero();
 741  
 742              int sizeF = op.Size & 1;
 743  
 744              OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
 745  
 746              int elems = op.GetBytesCount() >> sizeF + 2;
 747  
 748              for (int index = 0; index < elems; index++)
 749              {
 750                  Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
 751                  Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
 752  
 753                  res = context.VectorInsert(res, emit(ne, me), index);
 754              }
 755  
 756              context.Copy(GetVec(op.Rd), res);
 757          }
 758  
 759          public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit)
 760          {
 761              OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
 762  
 763              Operand res = context.VectorZero();
 764  
 765              int sizeF = op.Size & 1;
 766  
 767              OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
 768  
 769              int elems = op.GetBytesCount() >> sizeF + 2;
 770  
 771              for (int index = 0; index < elems; index++)
 772              {
 773                  Operand de = context.VectorExtract(type, GetVec(op.Rd), index);
 774                  Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
 775                  Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index);
 776  
 777                  res = context.VectorInsert(res, emit(de, ne, me), index);
 778              }
 779  
 780              context.Copy(GetVec(op.Rd), res);
 781          }
 782  
 783          public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit)
 784          {
 785              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 786  
 787              Operand res = context.VectorZero();
 788  
 789              int elems = op.GetBytesCount() >> op.Size;
 790  
 791              for (int index = 0; index < elems; index++)
 792              {
 793                  Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
 794  
 795                  res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
 796              }
 797  
 798              context.Copy(GetVec(op.Rd), res);
 799          }
 800  
 801          public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit)
 802          {
 803              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 804  
 805              Operand res = context.VectorZero();
 806  
 807              int elems = op.GetBytesCount() >> op.Size;
 808  
 809              for (int index = 0; index < elems; index++)
 810              {
 811                  Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
 812                  Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
 813  
 814                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
 815              }
 816  
 817              context.Copy(GetVec(op.Rd), res);
 818          }
 819  
 820          public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit)
 821          {
 822              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 823  
 824              Operand res = context.VectorZero();
 825  
 826              int elems = op.GetBytesCount() >> op.Size;
 827  
 828              for (int index = 0; index < elems; index++)
 829              {
 830                  Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size);
 831                  Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
 832                  Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
 833  
 834                  res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
 835              }
 836  
 837              context.Copy(GetVec(op.Rd), res);
 838          }
 839  
 840          public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit)
 841          {
 842              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 843  
 844              Operand res = context.VectorZero();
 845  
 846              int elems = op.GetBytesCount() >> op.Size;
 847  
 848              for (int index = 0; index < elems; index++)
 849              {
 850                  Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 851  
 852                  res = EmitVectorInsert(context, res, emit(ne), index, op.Size);
 853              }
 854  
 855              context.Copy(GetVec(op.Rd), res);
 856          }
 857  
 858          public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit)
 859          {
 860              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 861  
 862              Operand res = context.VectorZero();
 863  
 864              int elems = op.GetBytesCount() >> op.Size;
 865  
 866              for (int index = 0; index < elems; index++)
 867              {
 868                  Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 869                  Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
 870  
 871                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
 872              }
 873  
 874              context.Copy(GetVec(op.Rd), res);
 875          }
 876  
 877          public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit)
 878          {
 879              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
 880  
 881              Operand res = context.VectorZero();
 882  
 883              int elems = op.GetBytesCount() >> op.Size;
 884  
 885              for (int index = 0; index < elems; index++)
 886              {
 887                  Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
 888                  Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 889                  Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size);
 890  
 891                  res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
 892              }
 893  
 894              context.Copy(GetVec(op.Rd), res);
 895          }
 896  
 897          public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
 898          {
 899              OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
 900  
 901              Operand res = context.VectorZero();
 902  
 903              Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size);
 904  
 905              int elems = op.GetBytesCount() >> op.Size;
 906  
 907              for (int index = 0; index < elems; index++)
 908              {
 909                  Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
 910  
 911                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
 912              }
 913  
 914              context.Copy(GetVec(op.Rd), res);
 915          }
 916  
 917          public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
 918          {
 919              OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
 920  
 921              Operand res = context.VectorZero();
 922  
 923              Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
 924  
 925              int elems = op.GetBytesCount() >> op.Size;
 926  
 927              for (int index = 0; index < elems; index++)
 928              {
 929                  Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 930  
 931                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size);
 932              }
 933  
 934              context.Copy(GetVec(op.Rd), res);
 935          }
 936  
 937          public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
 938          {
 939              OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
 940  
 941              Operand res = context.VectorZero();
 942  
 943              Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size);
 944  
 945              int elems = op.GetBytesCount() >> op.Size;
 946  
 947              for (int index = 0; index < elems; index++)
 948              {
 949                  Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
 950                  Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
 951  
 952                  res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size);
 953              }
 954  
 955              context.Copy(GetVec(op.Rd), res);
 956          }
 957  
 958          public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit)
 959          {
 960              OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
 961  
 962              Operand imm = Const(op.Immediate);
 963  
 964              Operand res = context.VectorZero();
 965  
 966              int elems = op.GetBytesCount() >> op.Size;
 967  
 968              for (int index = 0; index < elems; index++)
 969              {
 970                  res = EmitVectorInsert(context, res, emit(imm), index, op.Size);
 971              }
 972  
 973              context.Copy(GetVec(op.Rd), res);
 974          }
 975  
 976          public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit)
 977          {
 978              OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
 979  
 980              Operand imm = Const(op.Immediate);
 981  
 982              Operand res = context.VectorZero();
 983  
 984              int elems = op.GetBytesCount() >> op.Size;
 985  
 986              for (int index = 0; index < elems; index++)
 987              {
 988                  Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);
 989  
 990                  res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size);
 991              }
 992  
 993              context.Copy(GetVec(op.Rd), res);
 994          }
 995  
 996          public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
 997          {
 998              EmitVectorWidenRmBinaryOp(context, emit, signed: true);
 999          }
1000  
1001          public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
1002          {
1003              EmitVectorWidenRmBinaryOp(context, emit, signed: false);
1004          }
1005  
1006          private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
1007          {
1008              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1009  
1010              Operand res = context.VectorZero();
1011  
1012              int elems = 8 >> op.Size;
1013  
1014              int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
1015  
1016              for (int index = 0; index < elems; index++)
1017              {
1018                  Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
1019                  Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
1020  
1021                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
1022              }
1023  
1024              context.Copy(GetVec(op.Rd), res);
1025          }
1026  
1027          public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit)
1028          {
1029              EmitVectorWidenRnRmBinaryOp(context, emit, signed: true);
1030          }
1031  
1032          public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit)
1033          {
1034              EmitVectorWidenRnRmBinaryOp(context, emit, signed: false);
1035          }
1036  
1037          private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed)
1038          {
1039              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1040  
1041              Operand res = context.VectorZero();
1042  
1043              int elems = 8 >> op.Size;
1044  
1045              int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
1046  
1047              for (int index = 0; index < elems; index++)
1048              {
1049                  Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
1050                  Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
1051  
1052                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
1053              }
1054  
1055              context.Copy(GetVec(op.Rd), res);
1056          }
1057  
1058          public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit)
1059          {
1060              EmitVectorWidenRnRmTernaryOp(context, emit, signed: true);
1061          }
1062  
1063          public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit)
1064          {
1065              EmitVectorWidenRnRmTernaryOp(context, emit, signed: false);
1066          }
1067  
1068          private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed)
1069          {
1070              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1071  
1072              Operand res = context.VectorZero();
1073  
1074              int elems = 8 >> op.Size;
1075  
1076              int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
1077  
1078              for (int index = 0; index < elems; index++)
1079              {
1080                  Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
1081                  Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
1082                  Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
1083  
1084                  res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
1085              }
1086  
1087              context.Copy(GetVec(op.Rd), res);
1088          }
1089  
1090          public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
1091          {
1092              EmitVectorWidenBinaryOpByElem(context, emit, signed: true);
1093          }
1094  
1095          public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit)
1096          {
1097              EmitVectorWidenBinaryOpByElem(context, emit, signed: false);
1098          }
1099  
1100          private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed)
1101          {
1102              OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
1103  
1104              Operand res = context.VectorZero();
1105  
1106              Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
1107  
1108              int elems = 8 >> op.Size;
1109  
1110              int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
1111  
1112              for (int index = 0; index < elems; index++)
1113              {
1114                  Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
1115  
1116                  res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
1117              }
1118  
1119              context.Copy(GetVec(op.Rd), res);
1120          }
1121  
1122          public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit)
1123          {
1124              EmitVectorWidenTernaryOpByElem(context, emit, signed: true);
1125          }
1126  
1127          public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit)
1128          {
1129              EmitVectorWidenTernaryOpByElem(context, emit, signed: false);
1130          }
1131  
1132          private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed)
1133          {
1134              OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
1135  
1136              Operand res = context.VectorZero();
1137  
1138              Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed);
1139  
1140              int elems = 8 >> op.Size;
1141  
1142              int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
1143  
1144              for (int index = 0; index < elems; index++)
1145              {
1146                  Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
1147                  Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
1148  
1149                  res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
1150              }
1151  
1152              context.Copy(GetVec(op.Rd), res);
1153          }
1154  
1155          public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit)
1156          {
1157              EmitVectorPairwiseOp(context, emit, signed: true);
1158          }
1159  
1160          public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit)
1161          {
1162              EmitVectorPairwiseOp(context, emit, signed: false);
1163          }
1164  
1165          private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed)
1166          {
1167              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1168  
1169              Operand res = context.VectorZero();
1170  
1171              int pairs = op.GetPairsCount() >> op.Size;
1172  
1173              for (int index = 0; index < pairs; index++)
1174              {
1175                  int pairIndex = index << 1;
1176  
1177                  Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed);
1178                  Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed);
1179  
1180                  Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed);
1181                  Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed);
1182  
1183                  res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size);
1184                  res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size);
1185              }
1186  
1187              context.Copy(GetVec(op.Rd), res);
1188          }
1189  
1190          public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst)
1191          {
1192              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1193  
1194              Operand n = GetVec(op.Rn);
1195              Operand m = GetVec(op.Rm);
1196  
1197              if (op.RegisterSize == RegisterSize.Simd64)
1198              {
1199                  Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]);
1200                  Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]);
1201  
1202                  Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n
1203  
1204                  Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n
1205                  Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask);  // 0:odd  from m:n
1206  
1207                  context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
1208              }
1209              else if (op.Size < 3)
1210              {
1211                  Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]);
1212  
1213                  Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n
1214                  Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m
1215  
1216                  Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM);
1217                  Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM);
1218  
1219                  context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right));
1220              }
1221              else
1222              {
1223                  Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m);
1224                  Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m);
1225  
1226                  context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right));
1227              }
1228          }
1229  
1230          public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
1231          {
1232              EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false);
1233          }
1234  
1235          public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
1236          {
1237              EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false);
1238          }
1239  
1240          public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit)
1241          {
1242              EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true);
1243          }
1244  
1245          public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit)
1246          {
1247              EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true);
1248          }
1249  
1250          private static void EmitVectorAcrossVectorOp(
1251              ArmEmitterContext context,
1252              Func2I emit,
1253              bool signed,
1254              bool isLong)
1255          {
1256              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1257  
1258              int elems = op.GetBytesCount() >> op.Size;
1259  
1260              Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
1261  
1262              for (int index = 1; index < elems; index++)
1263              {
1264                  Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
1265  
1266                  res = emit(res, n);
1267              }
1268  
1269              int size = isLong ? op.Size + 1 : op.Size;
1270  
1271              Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size);
1272  
1273              context.Copy(GetVec(op.Rd), d);
1274          }
1275  
1276          public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
1277          {
1278              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1279  
1280              Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
1281  
1282              Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
1283  
1284              for (int index = 1; index < 4; index++)
1285              {
1286                  Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index);
1287  
1288                  res = emit(res, n);
1289              }
1290  
1291              Operand d = context.VectorInsert(context.VectorZero(), res, 0);
1292  
1293              context.Copy(GetVec(op.Rd), d);
1294          }
1295  
1296          public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
1297          {
1298              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1299  
1300              Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
1301  
1302              const int SM0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
1303              const int SM1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
1304              const int SM2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
1305              const int SM3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
1306  
1307              Operand nCopy = context.Copy(GetVec(op.Rn));
1308  
1309              Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM0));
1310              Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM1));
1311              Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM2));
1312              Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM3));
1313  
1314              Operand res = emit(emit(part0, part1), emit(part2, part3));
1315  
1316              context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
1317          }
1318  
1319          public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
1320          {
1321              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1322  
1323              OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
1324  
1325              Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
1326              Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
1327  
1328              Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
1329  
1330              context.Copy(GetVec(op.Rd), res);
1331          }
1332  
1333          public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
1334          {
1335              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1336  
1337              Operand n = GetVec(op.Rn);
1338  
1339              Operand op0, op1;
1340  
1341              if ((op.Size & 1) == 0)
1342              {
1343                  const int SM0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
1344                  const int SM1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
1345  
1346                  Operand zeroN = context.VectorZeroUpper64(n);
1347  
1348                  op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(SM0));
1349                  op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(SM1));
1350              }
1351              else /* if ((op.Size & 1) == 1) */
1352              {
1353                  Operand zero = context.VectorZero();
1354  
1355                  op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
1356                  op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
1357              }
1358  
1359              context.Copy(GetVec(op.Rd), emit(op0, op1));
1360          }
1361  
1362          public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
1363          {
1364              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1365  
1366              Operand res = context.VectorZero();
1367  
1368              int sizeF = op.Size & 1;
1369  
1370              OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
1371  
1372              int pairs = op.GetPairsCount() >> sizeF + 2;
1373  
1374              for (int index = 0; index < pairs; index++)
1375              {
1376                  int pairIndex = index << 1;
1377  
1378                  Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex);
1379                  Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1);
1380  
1381                  Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex);
1382                  Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1);
1383  
1384                  res = context.VectorInsert(res, emit(n0, n1), index);
1385                  res = context.VectorInsert(res, emit(m0, m1), pairs + index);
1386              }
1387  
1388              context.Copy(GetVec(op.Rd), res);
1389          }
1390  
1391          public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
1392          {
1393              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
1394  
1395              Operand nCopy = context.Copy(GetVec(op.Rn));
1396              Operand mCopy = context.Copy(GetVec(op.Rm));
1397  
1398              int sizeF = op.Size & 1;
1399  
1400              if (sizeF == 0)
1401              {
1402                  if (op.RegisterSize == RegisterSize.Simd64)
1403                  {
1404                      Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
1405  
1406                      Operand zero = context.VectorZero();
1407  
1408                      Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
1409                      Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
1410  
1411                      context.Copy(GetVec(op.Rd), emit(part0, part1));
1412                  }
1413                  else /* if (op.RegisterSize == RegisterSize.Simd128) */
1414                  {
1415                      const int SM0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
1416                      const int SM1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
1417  
1418                      Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(SM0));
1419                      Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(SM1));
1420  
1421                      context.Copy(GetVec(op.Rd), emit(part0, part1));
1422                  }
1423              }
1424              else /* if (sizeF == 1) */
1425              {
1426                  Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
1427                  Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
1428  
1429                  context.Copy(GetVec(op.Rd), emit(part0, part1));
1430              }
1431          }
1432  
1433          public enum CmpCondition
1434          {
1435              // Legacy Sse.
1436              Equal = 0, // Ordered, non-signaling.
1437              LessThan = 1, // Ordered, signaling.
1438              LessThanOrEqual = 2, // Ordered, signaling.
1439              UnorderedQ = 3, // Non-signaling.
1440              NotLessThan = 5, // Unordered, signaling.
1441              NotLessThanOrEqual = 6, // Unordered, signaling.
1442              OrderedQ = 7, // Non-signaling.
1443  
1444              // Vex.
1445              GreaterThanOrEqual = 13, // Ordered, signaling.
1446              GreaterThan = 14, // Ordered, signaling.
1447              OrderedS = 23, // Signaling.
1448          }
1449  
1450          [Flags]
1451          public enum SaturatingFlags
1452          {
1453              None = 0,
1454  
1455              ByElem = 1 << 0,
1456              Scalar = 1 << 1,
1457              Signed = 1 << 2,
1458  
1459              Add = 1 << 3,
1460              Sub = 1 << 4,
1461  
1462              Accumulate = 1 << 5,
1463          }
1464  
1465          public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
1466          {
1467              EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed);
1468          }
1469  
1470          public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit)
1471          {
1472              EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed);
1473          }
1474  
1475          public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags)
1476          {
1477              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1478  
1479              Operand res = context.VectorZero();
1480  
1481              bool scalar = (flags & SaturatingFlags.Scalar) != 0;
1482  
1483              int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
1484  
1485              for (int index = 0; index < elems; index++)
1486              {
1487                  Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
1488                  Operand de;
1489  
1490                  if (op.Size <= 2)
1491                  {
1492                      de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true);
1493                  }
1494                  else /* if (op.Size == 3) */
1495                  {
1496                      de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne));
1497                  }
1498  
1499                  res = EmitVectorInsert(context, res, de, index, op.Size);
1500              }
1501  
1502              context.Copy(GetVec(op.Rd), res);
1503          }
1504  
1505          public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
1506          {
1507              EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags);
1508          }
1509  
1510          public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
1511          {
1512              EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags);
1513          }
1514  
1515          public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None)
1516          {
1517              EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags);
1518          }
1519  
1520          public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags)
1521          {
1522              EmitSaturatingBinaryOp(context, null, flags);
1523          }
1524  
1525          public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit)
1526          {
1527              EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed);
1528          }
1529  
1530          public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags)
1531          {
1532              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1533  
1534              Operand res = context.VectorZero();
1535  
1536              bool byElem = (flags & SaturatingFlags.ByElem) != 0;
1537              bool scalar = (flags & SaturatingFlags.Scalar) != 0;
1538              bool signed = (flags & SaturatingFlags.Signed) != 0;
1539  
1540              bool add = (flags & SaturatingFlags.Add) != 0;
1541              bool sub = (flags & SaturatingFlags.Sub) != 0;
1542  
1543              bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
1544  
1545              int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
1546  
1547              if (add || sub)
1548              {
1549                  for (int index = 0; index < elems; index++)
1550                  {
1551                      Operand de;
1552                      Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
1553                      Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
1554  
1555                      if (op.Size <= 2)
1556                      {
1557                          Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me);
1558  
1559                          de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
1560                      }
1561                      else /* if (op.Size == 3) */
1562                      {
1563                          if (add)
1564                          {
1565                              de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me);
1566                          }
1567                          else /* if (sub) */
1568                          {
1569                              de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me);
1570                          }
1571                      }
1572  
1573                      res = EmitVectorInsert(context, res, de, index, op.Size);
1574                  }
1575              }
1576              else if (accumulate)
1577              {
1578                  for (int index = 0; index < elems; index++)
1579                  {
1580                      Operand de;
1581                      Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
1582                      Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed);
1583  
1584                      if (op.Size <= 2)
1585                      {
1586                          Operand temp = context.Add(ne, me);
1587  
1588                          de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed);
1589                      }
1590                      else /* if (op.Size == 3) */
1591                      {
1592                          de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me);
1593                      }
1594  
1595                      res = EmitVectorInsert(context, res, de, index, op.Size);
1596                  }
1597              }
1598              else
1599              {
1600                  Operand me = default;
1601  
1602                  if (byElem)
1603                  {
1604                      OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op;
1605  
1606                      me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed);
1607                  }
1608  
1609                  for (int index = 0; index < elems; index++)
1610                  {
1611                      Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
1612  
1613                      if (!byElem)
1614                      {
1615                          me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed);
1616                      }
1617  
1618                      Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed);
1619  
1620                      res = EmitVectorInsert(context, res, de, index, op.Size);
1621                  }
1622              }
1623  
1624              context.Copy(GetVec(op.Rd), res);
1625          }
1626  
1627          [Flags]
1628          public enum SaturatingNarrowFlags
1629          {
1630              Scalar = 1 << 0,
1631              SignedSrc = 1 << 1,
1632              SignedDst = 1 << 2,
1633  
1634              ScalarSxSx = Scalar | SignedSrc | SignedDst,
1635              ScalarSxZx = Scalar | SignedSrc,
1636              ScalarZxZx = Scalar,
1637  
1638              VectorSxSx = SignedSrc | SignedDst,
1639              VectorSxZx = SignedSrc,
1640              VectorZxZx = 0,
1641          }
1642  
1643          public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags)
1644          {
1645              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1646  
1647              bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
1648              bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
1649              bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
1650  
1651              int elems = !scalar ? 8 >> op.Size : 1;
1652  
1653              int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
1654  
1655              Operand d = GetVec(op.Rd);
1656  
1657              Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
1658  
1659              for (int index = 0; index < elems; index++)
1660              {
1661                  Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
1662  
1663                  Operand temp = signedSrc
1664                      ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst)
1665                      : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst);
1666  
1667                  res = EmitVectorInsert(context, res, temp, part + index, op.Size);
1668              }
1669  
1670              context.Copy(d, res);
1671          }
1672  
1673          // long SignedSignSatQ(long op, int size);
1674          public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size)
1675          {
1676              int eSize = 8 << size;
1677  
1678              Debug.Assert(op.Type == OperandType.I64);
1679              Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
1680  
1681              Operand lbl1 = Label();
1682              Operand lblEnd = Label();
1683  
1684              Operand zeroL = Const(0L);
1685              Operand maxT = Const((1L << (eSize - 1)) - 1L);
1686              Operand minT = Const(-(1L << (eSize - 1)));
1687  
1688              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL);
1689  
1690              context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual);
1691              context.Copy(res, maxT);
1692              SetFpFlag(context, FPState.QcFlag, Const(1));
1693              context.Branch(lblEnd);
1694  
1695              context.MarkLabel(lbl1);
1696              context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual);
1697              context.Copy(res, minT);
1698              SetFpFlag(context, FPState.QcFlag, Const(1));
1699              context.Branch(lblEnd);
1700  
1701              context.MarkLabel(lblEnd);
1702  
1703              return res;
1704          }
1705  
1706          // private static ulong UnsignedSignSatQ(ulong op, int size);
1707          public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size)
1708          {
1709              int eSize = 8 << size;
1710  
1711              Debug.Assert(op.Type == OperandType.I64);
1712              Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
1713  
1714              Operand lblEnd = Label();
1715  
1716              Operand zeroUL = Const(0UL);
1717              Operand maxT = Const(ulong.MaxValue >> (64 - eSize));
1718  
1719              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL);
1720  
1721              context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI);
1722              context.Copy(res, maxT);
1723              SetFpFlag(context, FPState.QcFlag, Const(1));
1724              context.Branch(lblEnd);
1725  
1726              context.MarkLabel(lblEnd);
1727  
1728              return res;
1729          }
1730  
1731          // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned).
1732          // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size);
1733          public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
1734          {
1735              int eSizeDst = 8 << sizeDst;
1736  
1737              Debug.Assert(op.Type == OperandType.I64);
1738              Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
1739  
1740              Operand lbl1 = Label();
1741              Operand lblEnd = Label();
1742  
1743              Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
1744              Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL);
1745  
1746              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
1747  
1748              context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual);
1749              context.Copy(res, maxT);
1750              SetFpFlag(context, FPState.QcFlag, Const(1));
1751              context.Branch(lblEnd);
1752  
1753              context.MarkLabel(lbl1);
1754              context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual);
1755              context.Copy(res, minT);
1756              SetFpFlag(context, FPState.QcFlag, Const(1));
1757              context.Branch(lblEnd);
1758  
1759              context.MarkLabel(lblEnd);
1760  
1761              return res;
1762          }
1763  
1764          // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
1765          // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size);
1766          public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst)
1767          {
1768              int eSizeDst = 8 << sizeDst;
1769  
1770              Debug.Assert(op.Type == OperandType.I64);
1771              Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32);
1772  
1773              Operand lblEnd = Label();
1774  
1775              Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL);
1776  
1777              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
1778  
1779              context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI);
1780              context.Copy(res, maxT);
1781              SetFpFlag(context, FPState.QcFlag, Const(1));
1782              context.Branch(lblEnd);
1783  
1784              context.MarkLabel(lblEnd);
1785  
1786              return res;
1787          }
1788  
1789          // long UnarySignedSatQAbsOrNeg(long op);
1790          private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op)
1791          {
1792              Debug.Assert(op.Type == OperandType.I64);
1793  
1794              Operand lblEnd = Label();
1795  
1796              Operand minL = Const(long.MinValue);
1797              Operand maxL = Const(long.MaxValue);
1798  
1799              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
1800  
1801              context.BranchIf(lblEnd, op, minL, Comparison.NotEqual);
1802              context.Copy(res, maxL);
1803              SetFpFlag(context, FPState.QcFlag, Const(1));
1804              context.Branch(lblEnd);
1805  
1806              context.MarkLabel(lblEnd);
1807  
1808              return res;
1809          }
1810  
1811          // long BinarySignedSatQAdd(long op1, long op2);
1812          public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
1813          {
1814              Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
1815  
1816              Operand lblEnd = Label();
1817  
1818              Operand minL = Const(long.MinValue);
1819              Operand maxL = Const(long.MaxValue);
1820              Operand zeroL = Const(0L);
1821  
1822              Operand add = context.Add(op1, op2);
1823              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
1824  
1825              Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2));
1826              Operand right = context.BitwiseExclusiveOr(op1, add);
1827              context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
1828  
1829              Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
1830              context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
1831              SetFpFlag(context, FPState.QcFlag, Const(1));
1832              context.Branch(lblEnd);
1833  
1834              context.MarkLabel(lblEnd);
1835  
1836              return res;
1837          }
1838  
1839          // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2);
1840          public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2)
1841          {
1842              Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
1843  
1844              Operand lblEnd = Label();
1845  
1846              Operand maxUL = Const(ulong.MaxValue);
1847  
1848              Operand add = context.Add(op1, op2);
1849              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
1850  
1851              context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
1852              context.Copy(res, maxUL);
1853              SetFpFlag(context, FPState.QcFlag, Const(1));
1854              context.Branch(lblEnd);
1855  
1856              context.MarkLabel(lblEnd);
1857  
1858              return res;
1859          }
1860  
1861          // long BinarySignedSatQSub(long op1, long op2);
1862          public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
1863          {
1864              Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
1865  
1866              Operand lblEnd = Label();
1867  
1868              Operand minL = Const(long.MinValue);
1869              Operand maxL = Const(long.MaxValue);
1870              Operand zeroL = Const(0L);
1871  
1872              Operand sub = context.Subtract(op1, op2);
1873              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
1874  
1875              Operand left = context.BitwiseExclusiveOr(op1, op2);
1876              Operand right = context.BitwiseExclusiveOr(op1, sub);
1877              context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual);
1878  
1879              Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL);
1880              context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL));
1881              SetFpFlag(context, FPState.QcFlag, Const(1));
1882              context.Branch(lblEnd);
1883  
1884              context.MarkLabel(lblEnd);
1885  
1886              return res;
1887          }
1888  
1889          // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2);
1890          public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2)
1891          {
1892              Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
1893  
1894              Operand lblEnd = Label();
1895  
1896              Operand zeroL = Const(0L);
1897  
1898              Operand sub = context.Subtract(op1, op2);
1899              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub);
1900  
1901              context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI);
1902              context.Copy(res, zeroL);
1903              SetFpFlag(context, FPState.QcFlag, Const(1));
1904              context.Branch(lblEnd);
1905  
1906              context.MarkLabel(lblEnd);
1907  
1908              return res;
1909          }
1910  
1911          // long BinarySignedSatQAcc(ulong op1, long op2);
1912          private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
1913          {
1914              Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
1915  
1916              Operand lbl1 = Label();
1917              Operand lbl2 = Label();
1918              Operand lblEnd = Label();
1919  
1920              Operand maxL = Const(long.MaxValue);
1921              Operand zeroL = Const(0L);
1922  
1923              Operand add = context.Add(op1, op2);
1924              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
1925  
1926              context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI);
1927              Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add);
1928              context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual);
1929              context.Copy(res, maxL);
1930              SetFpFlag(context, FPState.QcFlag, Const(1));
1931              context.Branch(lblEnd);
1932  
1933              context.MarkLabel(lbl1);
1934              context.BranchIf(lbl2, op2, zeroL, Comparison.Less);
1935              context.Copy(res, maxL);
1936              SetFpFlag(context, FPState.QcFlag, Const(1));
1937              context.Branch(lblEnd);
1938  
1939              context.MarkLabel(lbl2);
1940              context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI);
1941              context.Copy(res, maxL);
1942              SetFpFlag(context, FPState.QcFlag, Const(1));
1943              context.Branch(lblEnd);
1944  
1945              context.MarkLabel(lblEnd);
1946  
1947              return res;
1948          }
1949  
1950          // ulong BinaryUnsignedSatQAcc(long op1, ulong op2);
1951          private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2)
1952          {
1953              Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64);
1954  
1955              Operand lbl1 = Label();
1956              Operand lblEnd = Label();
1957  
1958              Operand maxUL = Const(ulong.MaxValue);
1959              Operand maxL = Const(long.MaxValue);
1960              Operand zeroL = Const(0L);
1961  
1962              Operand add = context.Add(op1, op2);
1963              Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add);
1964  
1965              context.BranchIf(lbl1, op1, zeroL, Comparison.Less);
1966              context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI);
1967              context.Copy(res, maxUL);
1968              SetFpFlag(context, FPState.QcFlag, Const(1));
1969              context.Branch(lblEnd);
1970  
1971              context.MarkLabel(lbl1);
1972              context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI);
1973              context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual);
1974              context.Copy(res, zeroL);
1975              SetFpFlag(context, FPState.QcFlag, Const(1));
1976              context.Branch(lblEnd);
1977  
1978              context.MarkLabel(lblEnd);
1979  
1980              return res;
1981          }
1982  
1983          public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector)
1984          {
1985              Operand mask;
1986              if (single)
1987              {
1988                  mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f);
1989              }
1990              else
1991              {
1992                  mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d);
1993              }
1994  
1995              return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value);
1996          }
1997  
1998          public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size)
1999          {
2000              return EmitVectorExtract(context, reg, index, size, true);
2001          }
2002  
2003          public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size)
2004          {
2005              return EmitVectorExtract(context, reg, index, size, false);
2006          }
2007  
2008          public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed)
2009          {
2010              ThrowIfInvalid(index, size);
2011  
2012              Operand res = default;
2013  
2014              switch (size)
2015              {
2016                  case 0:
2017                      res = context.VectorExtract8(GetVec(reg), index);
2018                      break;
2019  
2020                  case 1:
2021                      res = context.VectorExtract16(GetVec(reg), index);
2022                      break;
2023  
2024                  case 2:
2025                      res = context.VectorExtract(OperandType.I32, GetVec(reg), index);
2026                      break;
2027  
2028                  case 3:
2029                      res = context.VectorExtract(OperandType.I64, GetVec(reg), index);
2030                      break;
2031              }
2032  
2033              if (signed)
2034              {
2035                  switch (size)
2036                  {
2037                      case 0:
2038                          res = context.SignExtend8(OperandType.I64, res);
2039                          break;
2040                      case 1:
2041                          res = context.SignExtend16(OperandType.I64, res);
2042                          break;
2043                      case 2:
2044                          res = context.SignExtend32(OperandType.I64, res);
2045                          break;
2046                  }
2047              }
2048              else
2049              {
2050                  switch (size)
2051                  {
2052                      case 0:
2053                          res = context.ZeroExtend8(OperandType.I64, res);
2054                          break;
2055                      case 1:
2056                          res = context.ZeroExtend16(OperandType.I64, res);
2057                          break;
2058                      case 2:
2059                          res = context.ZeroExtend32(OperandType.I64, res);
2060                          break;
2061                  }
2062              }
2063  
2064              return res;
2065          }
2066  
2067          public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size)
2068          {
2069              ThrowIfInvalid(index, size);
2070  
2071              if (size < 3 && value.Type == OperandType.I64)
2072              {
2073                  value = context.ConvertI64ToI32(value);
2074              }
2075  
2076              switch (size)
2077              {
2078                  case 0:
2079                      vector = context.VectorInsert8(vector, value, index);
2080                      break;
2081                  case 1:
2082                      vector = context.VectorInsert16(vector, value, index);
2083                      break;
2084                  case 2:
2085                      vector = context.VectorInsert(vector, value, index);
2086                      break;
2087                  case 3:
2088                      vector = context.VectorInsert(vector, value, index);
2089                      break;
2090              }
2091  
2092              return vector;
2093          }
2094  
2095          public static void ThrowIfInvalid(int index, int size)
2096          {
2097              if ((uint)size > 3u)
2098              {
2099                  throw new ArgumentOutOfRangeException(nameof(size));
2100              }
2101  
2102              if ((uint)index >= 16u >> size)
2103              {
2104                  throw new ArgumentOutOfRangeException(nameof(index));
2105              }
2106          }
2107      }
2108  }