/ src / ARMeilleure / Instructions / InstEmitSimdArithmetic32.cs
InstEmitSimdArithmetic32.cs
   1  using ARMeilleure.Decoders;
   2  using ARMeilleure.IntermediateRepresentation;
   3  using ARMeilleure.Translation;
   4  using System;
   5  using static ARMeilleure.Instructions.InstEmitFlowHelper;
   6  using static ARMeilleure.Instructions.InstEmitHelper;
   7  using static ARMeilleure.Instructions.InstEmitSimdHelper;
   8  using static ARMeilleure.Instructions.InstEmitSimdHelper32;
   9  using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  10  
  11  namespace ARMeilleure.Instructions
  12  {
  13      static partial class InstEmit32
  14      {
  15          public static void Vabd_I(ArmEmitterContext context)
  16          {
  17              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
  18  
  19              EmitVectorBinaryOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
  20          }
  21  
  22          public static void Vabdl_I(ArmEmitterContext context)
  23          {
  24              OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
  25  
  26              EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U);
  27          }
  28  
  29          public static void Vabs_S(ArmEmitterContext context)
  30          {
  31              OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
  32  
  33              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
  34              {
  35                  InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
  36              }
  37              else if (Optimizations.FastFP && Optimizations.UseSse2)
  38              {
  39                  EmitScalarUnaryOpSimd32(context, (m) =>
  40                  {
  41                      return EmitFloatAbs(context, m, (op.Size & 1) == 0, false);
  42                  });
  43              }
  44              else
  45              {
  46                  EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
  47              }
  48          }
  49  
  50          public static void Vabs_V(ArmEmitterContext context)
  51          {
  52              OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
  53  
  54              if (op.F)
  55              {
  56                  if (Optimizations.FastFP && Optimizations.UseAdvSimd)
  57                  {
  58                      InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
  59                  }
  60                  else if (Optimizations.FastFP && Optimizations.UseSse2)
  61                  {
  62                      EmitVectorUnaryOpSimd32(context, (m) =>
  63                      {
  64                          return EmitFloatAbs(context, m, (op.Size & 1) == 0, true);
  65                      });
  66                  }
  67                  else
  68                  {
  69                      EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1));
  70                  }
  71              }
  72              else
  73              {
  74                  EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1));
  75              }
  76          }
  77  
  78          private static Operand EmitAbs(ArmEmitterContext context, Operand value)
  79          {
  80              Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0));
  81  
  82              return context.ConditionalSelect(isPositive, value, context.Negate(value));
  83          }
  84  
  85          public static void Vadd_S(ArmEmitterContext context)
  86          {
  87              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
  88              {
  89                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
  90              }
  91              else if (Optimizations.FastFP && Optimizations.UseSse2)
  92              {
  93                  EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
  94              }
  95              else if (Optimizations.FastFP)
  96              {
  97                  EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
  98              }
  99              else
 100              {
 101                  EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2));
 102              }
 103          }
 104  
 105          public static void Vadd_V(ArmEmitterContext context)
 106          {
 107              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 108              {
 109                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
 110              }
 111              else if (Optimizations.FastFP && Optimizations.UseSse2)
 112              {
 113                  EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
 114              }
 115              else if (Optimizations.FastFP)
 116              {
 117                  EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2));
 118              }
 119              else
 120              {
 121                  EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
 122              }
 123          }
 124  
 125          public static void Vadd_I(ArmEmitterContext context)
 126          {
 127              if (Optimizations.UseSse2)
 128              {
 129                  OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
 130                  EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PaddInstruction[op.Size], op1, op2));
 131              }
 132              else
 133              {
 134                  EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2));
 135              }
 136          }
 137  
 138          public static void Vaddl_I(ArmEmitterContext context)
 139          {
 140              OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
 141  
 142              EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
 143          }
 144  
 145          public static void Vaddw_I(ArmEmitterContext context)
 146          {
 147              OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
 148  
 149              EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
 150          }
 151  
 152          public static void Vcnt(ArmEmitterContext context)
 153          {
 154              OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
 155  
 156              Operand res = GetVecA32(op.Qd);
 157  
 158              int elems = op.GetBytesCount();
 159  
 160              for (int index = 0; index < elems; index++)
 161              {
 162                  Operand de;
 163                  Operand me = EmitVectorExtractZx32(context, op.Qm, op.Im + index, op.Size);
 164  
 165                  if (Optimizations.UsePopCnt)
 166                  {
 167                      de = context.AddIntrinsicInt(Intrinsic.X86Popcnt, me);
 168                  }
 169                  else
 170                  {
 171                      de = EmitCountSetBits8(context, me);
 172                  }
 173  
 174                  res = EmitVectorInsert(context, res, de, op.Id + index, op.Size);
 175              }
 176  
 177              context.Copy(GetVecA32(op.Qd), res);
 178          }
 179  
 180          public static void Vdup(ArmEmitterContext context)
 181          {
 182              OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp;
 183  
 184              Operand insert = GetIntA32(context, op.Rt);
 185  
 186              // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
 187              insert = op.Size switch
 188              {
 189                  2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
 190                  1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
 191                  0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
 192                  _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\"."),
 193              };
 194  
 195              InsertScalar(context, op.Vd, insert);
 196              if (op.Q)
 197              {
 198                  InsertScalar(context, op.Vd + 1, insert);
 199              }
 200          }
 201  
 202          public static void Vdup_1(ArmEmitterContext context)
 203          {
 204              OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp;
 205  
 206              Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
 207  
 208              // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
 209              insert = op.Size switch
 210              {
 211                  2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
 212                  1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
 213                  0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
 214                  _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\"."),
 215              };
 216  
 217              InsertScalar(context, op.Vd, insert);
 218              if (op.Q)
 219              {
 220                  InsertScalar(context, op.Vd | 1, insert);
 221              }
 222          }
 223  
 224          private static (long, long) MaskHelperByteSequence(int start, int length, int startByte)
 225          {
 226              int end = start + length;
 227              int b = startByte;
 228              long result = 0;
 229              long result2 = 0;
 230              for (int i = 0; i < 8; i++)
 231              {
 232                  result |= (long)((i >= end || i < start) ? 0x80 : b++) << (i * 8);
 233              }
 234              for (int i = 8; i < 16; i++)
 235              {
 236                  result2 |= (long)((i >= end || i < start) ? 0x80 : b++) << ((i - 8) * 8);
 237              }
 238              return (result2, result);
 239          }
 240  
 241          public static void Vext(ArmEmitterContext context)
 242          {
 243              OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp;
 244              int elems = op.GetBytesCount();
 245              int byteOff = op.Immediate;
 246  
 247              if (Optimizations.UseSsse3)
 248              {
 249                  EmitVectorBinaryOpSimd32(context, (n, m) =>
 250                  {
 251                      // Writing low to high of d: start <imm> into n, overlap into m.
 252                      // Then rotate n down by <imm>, m up by (elems)-imm.
 253                      // Then OR them together for the result.
 254  
 255                      (long nMaskHigh, long nMaskLow) = MaskHelperByteSequence(0, elems - byteOff, byteOff);
 256                      (long mMaskHigh, long mMaskLow) = MaskHelperByteSequence(elems - byteOff, byteOff, 0);
 257                      Operand nMask, mMask;
 258                      if (!op.Q)
 259                      {
 260                          // Do the same operation to the bytes in the top doubleword too, as our target could be in either.
 261                          nMaskHigh = nMaskLow + 0x0808080808080808L;
 262                          mMaskHigh = mMaskLow + 0x0808080808080808L;
 263                      }
 264                      nMask = X86GetElements(context, nMaskHigh, nMaskLow);
 265                      mMask = X86GetElements(context, mMaskHigh, mMaskLow);
 266                      Operand nPart = context.AddIntrinsic(Intrinsic.X86Pshufb, n, nMask);
 267                      Operand mPart = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mMask);
 268  
 269                      return context.AddIntrinsic(Intrinsic.X86Por, nPart, mPart);
 270                  });
 271              }
 272              else
 273              {
 274                  Operand res = GetVecA32(op.Qd);
 275  
 276                  for (int index = 0; index < elems; index++)
 277                  {
 278                      Operand extract;
 279  
 280                      if (byteOff >= elems)
 281                      {
 282                          extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
 283                      }
 284                      else
 285                      {
 286                          extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
 287                      }
 288                      byteOff++;
 289  
 290                      res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
 291                  }
 292  
 293                  context.Copy(GetVecA32(op.Qd), res);
 294              }
 295          }
 296  
 297          public static void Vfma_S(ArmEmitterContext context) // Fused.
 298          {
 299              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 300              {
 301                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
 302              }
 303              else if (Optimizations.FastFP && Optimizations.UseFma)
 304              {
 305                  EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
 306              }
 307              else if (Optimizations.FastFP && Optimizations.UseSse2)
 308              {
 309                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
 310              }
 311              else
 312              {
 313                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 314                  {
 315                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
 316                  });
 317              }
 318          }
 319  
 320          public static void Vfma_V(ArmEmitterContext context) // Fused.
 321          {
 322              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 323              {
 324                  InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
 325              }
 326              else if (Optimizations.FastFP && Optimizations.UseFma)
 327              {
 328                  EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
 329              }
 330              else
 331              {
 332                  EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
 333                  {
 334                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
 335                  });
 336              }
 337          }
 338  
 339          public static void Vfms_S(ArmEmitterContext context) // Fused.
 340          {
 341              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 342              {
 343                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
 344              }
 345              else if (Optimizations.FastFP && Optimizations.UseFma)
 346              {
 347                  EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
 348              }
 349              else if (Optimizations.FastFP && Optimizations.UseSse2)
 350              {
 351                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
 352              }
 353              else
 354              {
 355                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 356                  {
 357                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
 358                  });
 359              }
 360          }
 361  
 362          public static void Vfms_V(ArmEmitterContext context) // Fused.
 363          {
 364              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 365              {
 366                  InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
 367              }
 368              else if (Optimizations.FastFP && Optimizations.UseFma)
 369              {
 370                  EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
 371              }
 372              else
 373              {
 374                  EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
 375                  {
 376                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
 377                  });
 378              }
 379          }
 380  
 381          public static void Vfnma_S(ArmEmitterContext context) // Fused.
 382          {
 383              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 384              {
 385                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
 386              }
 387              else if (Optimizations.FastFP && Optimizations.UseFma)
 388              {
 389                  EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
 390              }
 391              else if (Optimizations.FastFP && Optimizations.UseSse2)
 392              {
 393                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
 394              }
 395              else
 396              {
 397                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 398                  {
 399                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3);
 400                  });
 401              }
 402          }
 403  
 404          public static void Vfnms_S(ArmEmitterContext context) // Fused.
 405          {
 406              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 407              {
 408                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
 409              }
 410              else if (Optimizations.FastFP && Optimizations.UseFma)
 411              {
 412                  EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
 413              }
 414              else if (Optimizations.FastFP && Optimizations.UseSse2)
 415              {
 416                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
 417              }
 418              else
 419              {
 420                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 421                  {
 422                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3);
 423                  });
 424              }
 425          }
 426  
 427          public static void Vhadd(ArmEmitterContext context)
 428          {
 429              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
 430  
 431              if (op.U)
 432              {
 433                  EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftRightUI(context.Add(op1, op2), Const(1)));
 434              }
 435              else
 436              {
 437                  EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftRightSI(context.Add(op1, op2), Const(1)));
 438              }
 439          }
 440  
 441          public static void Vmov_S(ArmEmitterContext context)
 442          {
 443              if (Optimizations.FastFP && Optimizations.UseSse2)
 444              {
 445                  EmitScalarUnaryOpF32(context, 0, 0);
 446              }
 447              else
 448              {
 449                  EmitScalarUnaryOpF32(context, (op1) => op1);
 450              }
 451          }
 452  
 453          public static void Vmovn(ArmEmitterContext context)
 454          {
 455              EmitVectorUnaryNarrowOp32(context, (op1) => op1);
 456          }
 457  
 458          public static void Vneg_S(ArmEmitterContext context)
 459          {
 460              OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
 461  
 462              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 463              {
 464                  InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
 465              }
 466              else if (Optimizations.UseSse2)
 467              {
 468                  EmitScalarUnaryOpSimd32(context, (m) =>
 469                  {
 470                      if ((op.Size & 1) == 0)
 471                      {
 472                          Operand mask = X86GetScalar(context, -0f);
 473                          return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
 474                      }
 475                      else
 476                      {
 477                          Operand mask = X86GetScalar(context, -0d);
 478                          return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
 479                      }
 480                  });
 481              }
 482              else
 483              {
 484                  EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
 485              }
 486          }
 487  
 488          public static void Vnmul_S(ArmEmitterContext context)
 489          {
 490              OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 491  
 492              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 493              {
 494                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
 495              }
 496              else if (Optimizations.UseSse2)
 497              {
 498                  EmitScalarBinaryOpSimd32(context, (n, m) =>
 499                  {
 500                      if ((op.Size & 1) == 0)
 501                      {
 502                          Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
 503                          Operand mask = X86GetScalar(context, -0f);
 504                          return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res);
 505                      }
 506                      else
 507                      {
 508                          Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
 509                          Operand mask = X86GetScalar(context, -0d);
 510                          return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res);
 511                      }
 512                  });
 513              }
 514              else
 515              {
 516                  EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
 517              }
 518          }
 519  
 520          public static void Vnmla_S(ArmEmitterContext context)
 521          {
 522              OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 523  
 524              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 525              {
 526                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
 527              }
 528              else if (Optimizations.FastFP && Optimizations.UseSse2)
 529              {
 530                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
 531              }
 532              else if (Optimizations.FastFP)
 533              {
 534                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 535                  {
 536                      return context.Subtract(context.Negate(op1), context.Multiply(op2, op3));
 537                  });
 538              }
 539              else
 540              {
 541                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 542                  {
 543                      Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
 544                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res);
 545                  });
 546              }
 547          }
 548  
 549          public static void Vnmls_S(ArmEmitterContext context)
 550          {
 551              OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
 552  
 553              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 554              {
 555                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
 556              }
 557              else if (Optimizations.FastFP && Optimizations.UseSse2)
 558              {
 559                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
 560              }
 561              else if (Optimizations.FastFP)
 562              {
 563                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 564                  {
 565                      return context.Add(context.Negate(op1), context.Multiply(op2, op3));
 566                  });
 567              }
 568              else
 569              {
 570                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 571                  {
 572                      Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
 573                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res);
 574                  });
 575              }
 576          }
 577  
 578          public static void Vneg_V(ArmEmitterContext context)
 579          {
 580              OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
 581  
 582              if (op.F)
 583              {
 584                  if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 585                  {
 586                      InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
 587                  }
 588                  else if (Optimizations.FastFP && Optimizations.UseSse2)
 589                  {
 590                      EmitVectorUnaryOpSimd32(context, (m) =>
 591                      {
 592                          if ((op.Size & 1) == 0)
 593                          {
 594                              Operand mask = X86GetAllElements(context, -0f);
 595                              return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m);
 596                          }
 597                          else
 598                          {
 599                              Operand mask = X86GetAllElements(context, -0d);
 600                              return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m);
 601                          }
 602                      });
 603                  }
 604                  else
 605                  {
 606                      EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
 607                  }
 608              }
 609              else
 610              {
 611                  EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1));
 612              }
 613          }
 614  
 615          public static void Vdiv_S(ArmEmitterContext context)
 616          {
 617              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 618              {
 619                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
 620              }
 621              else if (Optimizations.FastFP && Optimizations.UseSse2)
 622              {
 623                  EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
 624              }
 625              else if (Optimizations.FastFP)
 626              {
 627                  EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2));
 628              }
 629              else
 630              {
 631                  EmitScalarBinaryOpF32(context, (op1, op2) =>
 632                  {
 633                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2);
 634                  });
 635              }
 636          }
 637  
 638          public static void Vmaxnm_S(ArmEmitterContext context)
 639          {
 640              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 641              {
 642                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
 643              }
 644              else if (Optimizations.FastFP && Optimizations.UseSse41)
 645              {
 646                  EmitSse41MaxMinNumOpF32(context, true, true);
 647              }
 648              else
 649              {
 650                  EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2));
 651              }
 652          }
 653  
 654          public static void Vmaxnm_V(ArmEmitterContext context)
 655          {
 656              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 657              {
 658                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
 659              }
 660              else if (Optimizations.FastFP && Optimizations.UseSse41)
 661              {
 662                  EmitSse41MaxMinNumOpF32(context, true, false);
 663              }
 664              else
 665              {
 666                  EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxNumFpscr), op1, op2));
 667              }
 668          }
 669  
 670          public static void Vminnm_S(ArmEmitterContext context)
 671          {
 672              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 673              {
 674                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
 675              }
 676              else if (Optimizations.FastFP && Optimizations.UseSse41)
 677              {
 678                  EmitSse41MaxMinNumOpF32(context, false, true);
 679              }
 680              else
 681              {
 682                  EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2));
 683              }
 684          }
 685  
 686          public static void Vminnm_V(ArmEmitterContext context)
 687          {
 688              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 689              {
 690                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
 691              }
 692              else if (Optimizations.FastFP && Optimizations.UseSse41)
 693              {
 694                  EmitSse41MaxMinNumOpF32(context, false, false);
 695              }
 696              else
 697              {
 698                  EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinNumFpscr), op1, op2));
 699              }
 700          }
 701  
 702          public static void Vmax_V(ArmEmitterContext context)
 703          {
 704              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 705              {
 706                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
 707              }
 708              else if (Optimizations.FastFP && Optimizations.UseSse2)
 709              {
 710                  EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
 711              }
 712              else
 713              {
 714                  EmitVectorBinaryOpF32(context, (op1, op2) =>
 715                  {
 716                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxFpscr), op1, op2);
 717                  });
 718              }
 719          }
 720  
 721          public static void Vmax_I(ArmEmitterContext context)
 722          {
 723              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
 724  
 725              if (op.U)
 726              {
 727                  if (Optimizations.UseSse2)
 728                  {
 729                      EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxuInstruction[op.Size], op1, op2));
 730                  }
 731                  else
 732                  {
 733                      EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2));
 734                  }
 735              }
 736              else
 737              {
 738                  if (Optimizations.UseSse2)
 739                  {
 740                      EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxsInstruction[op.Size], op1, op2));
 741                  }
 742                  else
 743                  {
 744                      EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2));
 745                  }
 746              }
 747          }
 748  
 749          public static void Vmin_V(ArmEmitterContext context)
 750          {
 751              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 752              {
 753                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
 754              }
 755              else if (Optimizations.FastFP && Optimizations.UseSse2)
 756              {
 757                  EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
 758              }
 759              else
 760              {
 761                  EmitVectorBinaryOpF32(context, (op1, op2) =>
 762                  {
 763                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2);
 764                  });
 765              }
 766          }
 767  
 768          public static void Vmin_I(ArmEmitterContext context)
 769          {
 770              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
 771  
 772              if (op.U)
 773              {
 774                  if (Optimizations.UseSse2)
 775                  {
 776                      EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2));
 777                  }
 778                  else
 779                  {
 780                      EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
 781                  }
 782              }
 783              else
 784              {
 785                  if (Optimizations.UseSse2)
 786                  {
 787                      EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2));
 788                  }
 789                  else
 790                  {
 791                      EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2));
 792                  }
 793              }
 794          }
 795  
 796          public static void Vmla_S(ArmEmitterContext context)
 797          {
 798              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 799              {
 800                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
 801              }
 802              else if (Optimizations.FastFP && Optimizations.UseSse2)
 803              {
 804                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
 805              }
 806              else if (Optimizations.FastFP)
 807              {
 808                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 809                  {
 810                      return context.Add(op1, context.Multiply(op2, op3));
 811                  });
 812              }
 813              else
 814              {
 815                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 816                  {
 817                      Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
 818                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
 819                  });
 820              }
 821          }
 822  
 823          public static void Vmla_V(ArmEmitterContext context)
 824          {
 825              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 826              {
 827                  InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
 828              }
 829              else if (Optimizations.FastFP && Optimizations.UseSse2)
 830              {
 831                  EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
 832              }
 833              else if (Optimizations.FastFP)
 834              {
 835                  EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
 836              }
 837              else
 838              {
 839                  EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
 840                  {
 841                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3);
 842                  });
 843              }
 844          }
 845  
 846          public static void Vmla_I(ArmEmitterContext context)
 847          {
 848              EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
 849          }
 850  
 851          public static void Vmla_1(ArmEmitterContext context)
 852          {
 853              OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
 854  
 855              if (op.F)
 856              {
 857                  if (Optimizations.FastFP && Optimizations.UseSse2)
 858                  {
 859                      EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
 860                  }
 861                  else if (Optimizations.FastFP)
 862                  {
 863                      EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
 864                  }
 865                  else
 866                  {
 867                      EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3));
 868                  }
 869              }
 870              else
 871              {
 872                  EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
 873              }
 874          }
 875  
 876          public static void Vmlal_I(ArmEmitterContext context)
 877          {
 878              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
 879  
 880              EmitVectorTernaryLongOpI32(context, (d, n, m) => context.Add(d, context.Multiply(n, m)), !op.U);
 881          }
 882  
 883          public static void Vmls_S(ArmEmitterContext context)
 884          {
 885              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 886              {
 887                  InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
 888              }
 889              else if (Optimizations.FastFP && Optimizations.UseSse2)
 890              {
 891                  EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
 892              }
 893              else if (Optimizations.FastFP)
 894              {
 895                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 896                  {
 897                      return context.Subtract(op1, context.Multiply(op2, op3));
 898                  });
 899              }
 900              else
 901              {
 902                  EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
 903                  {
 904                      Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
 905                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
 906                  });
 907              }
 908          }
 909  
 910          public static void Vmls_V(ArmEmitterContext context)
 911          {
 912              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 913              {
 914                  InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
 915              }
 916              else if (Optimizations.FastFP && Optimizations.UseSse2)
 917              {
 918                  EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
 919              }
 920              else if (Optimizations.FastFP)
 921              {
 922                  EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
 923              }
 924              else
 925              {
 926                  EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
 927                  {
 928                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3);
 929                  });
 930              }
 931          }
 932  
 933          public static void Vmls_I(ArmEmitterContext context)
 934          {
 935              EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
 936          }
 937  
 938          public static void Vmls_1(ArmEmitterContext context)
 939          {
 940              OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
 941  
 942              if (op.F)
 943              {
 944                  if (Optimizations.FastFP && Optimizations.UseSse2)
 945                  {
 946                      EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
 947                  }
 948                  else if (Optimizations.FastFP)
 949                  {
 950                      EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
 951                  }
 952                  else
 953                  {
 954                      EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3));
 955                  }
 956              }
 957              else
 958              {
 959                  EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
 960              }
 961          }
 962  
 963          public static void Vmlsl_I(ArmEmitterContext context)
 964          {
 965              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
 966  
 967              EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U);
 968          }
 969  
 970          public static void Vmul_S(ArmEmitterContext context)
 971          {
 972              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 973              {
 974                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
 975              }
 976              else if (Optimizations.FastFP && Optimizations.UseSse2)
 977              {
 978                  EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
 979              }
 980              else if (Optimizations.FastFP)
 981              {
 982                  EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
 983              }
 984              else
 985              {
 986                  EmitScalarBinaryOpF32(context, (op1, op2) =>
 987                  {
 988                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2);
 989                  });
 990              }
 991          }
 992  
 993          public static void Vmul_V(ArmEmitterContext context)
 994          {
 995              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
 996              {
 997                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
 998              }
 999              else if (Optimizations.FastFP && Optimizations.UseSse2)
1000              {
1001                  EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
1002              }
1003              else if (Optimizations.FastFP)
1004              {
1005                  EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
1006              }
1007              else
1008              {
1009                  EmitVectorBinaryOpF32(context, (op1, op2) =>
1010                  {
1011                      return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2);
1012                  });
1013              }
1014          }
1015  
1016          public static void Vmul_I(ArmEmitterContext context)
1017          {
1018              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1019  
1020              if (op.U) // This instruction is always signed, U indicates polynomial mode.
1021              {
1022                  EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size));
1023              }
1024              else
1025              {
1026                  EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
1027              }
1028          }
1029  
1030          public static void Vmul_1(ArmEmitterContext context)
1031          {
1032              OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
1033  
1034              if (op.F)
1035              {
1036                  if (Optimizations.FastFP && Optimizations.UseSse2)
1037                  {
1038                      EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
1039                  }
1040                  else if (Optimizations.FastFP)
1041                  {
1042                      EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
1043                  }
1044                  else
1045                  {
1046                      EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2));
1047                  }
1048              }
1049              else
1050              {
1051                  EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
1052              }
1053          }
1054  
1055          public static void Vmull_1(ArmEmitterContext context)
1056          {
1057              OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
1058  
1059              EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
1060          }
1061  
1062          public static void Vmull_I(ArmEmitterContext context)
1063          {
1064              OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
1065  
1066              if (op.Polynomial)
1067              {
1068                  if (op.Size == 0) // P8
1069                  {
1070                      EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
1071                  }
1072                  else /* if (op.Size == 2) // P64 */
1073                  {
1074                      Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1);
1075                      Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1);
1076  
1077                      Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
1078  
1079                      context.Copy(GetVecA32(op.Qd), res);
1080                  }
1081              }
1082              else
1083              {
1084                  EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
1085              }
1086          }
1087  
1088          public static void Vpadd_V(ArmEmitterContext context)
1089          {
1090              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1091              {
1092                  InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
1093              }
1094              else if (Optimizations.FastFP && Optimizations.UseSse2)
1095              {
1096                  EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
1097              }
1098              else
1099              {
1100                  EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2));
1101              }
1102          }
1103  
1104          public static void Vpadd_I(ArmEmitterContext context)
1105          {
1106              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1107  
1108              if (Optimizations.UseSsse3)
1109              {
1110                  EmitSsse3VectorPairwiseOp32(context, X86PaddInstruction);
1111              }
1112              else
1113              {
1114                  EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U);
1115              }
1116          }
1117  
1118          public static void Vpadal(ArmEmitterContext context)
1119          {
1120              OpCode32Simd op = (OpCode32Simd)context.CurrOp;
1121  
1122              EmitVectorPairwiseTernaryLongOpI32(context, (op1, op2, op3) => context.Add(context.Add(op1, op2), op3), op.Opc != 1);
1123          }
1124  
1125          public static void Vpaddl(ArmEmitterContext context)
1126          {
1127              OpCode32Simd op = (OpCode32Simd)context.CurrOp;
1128  
1129              EmitVectorPairwiseLongOpI32(context, (op1, op2) => context.Add(op1, op2), (op.Opc & 1) == 0);
1130          }
1131  
1132          public static void Vpmax_V(ArmEmitterContext context)
1133          {
1134              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1135              {
1136                  InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
1137              }
1138              else if (Optimizations.FastFP && Optimizations.UseSse2)
1139              {
1140                  EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
1141              }
1142              else
1143              {
1144                  EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2));
1145              }
1146          }
1147  
1148          public static void Vpmax_I(ArmEmitterContext context)
1149          {
1150              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1151  
1152              if (Optimizations.UseSsse3)
1153              {
1154                  EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction);
1155              }
1156              else
1157              {
1158                  EmitVectorPairwiseOpI32(context, (op1, op2) =>
1159                  {
1160                      Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2);
1161                      return context.ConditionalSelect(greater, op1, op2);
1162                  }, !op.U);
1163              }
1164          }
1165  
1166          public static void Vpmin_V(ArmEmitterContext context)
1167          {
1168              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1169              {
1170                  InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
1171              }
1172              else if (Optimizations.FastFP && Optimizations.UseSse2)
1173              {
1174                  EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
1175              }
1176              else
1177              {
1178                  EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2));
1179              }
1180          }
1181  
1182          public static void Vpmin_I(ArmEmitterContext context)
1183          {
1184              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1185  
1186              if (Optimizations.UseSsse3)
1187              {
1188                  EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction);
1189              }
1190              else
1191              {
1192                  EmitVectorPairwiseOpI32(context, (op1, op2) =>
1193                  {
1194                      Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2);
1195                      return context.ConditionalSelect(greater, op1, op2);
1196                  }, !op.U);
1197              }
1198          }
1199  
1200          public static void Vqadd(ArmEmitterContext context)
1201          {
1202              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1203  
1204              EmitSaturatingAddSubBinaryOp(context, add: true, !op.U);
1205          }
1206  
1207          public static void Vqdmulh(ArmEmitterContext context)
1208          {
1209              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1210              int eSize = 8 << op.Size;
1211  
1212              EmitVectorBinaryOpI32(context, (op1, op2) =>
1213              {
1214                  if (op.Size == 2)
1215                  {
1216                      op1 = context.SignExtend32(OperandType.I64, op1);
1217                      op2 = context.SignExtend32(OperandType.I64, op2);
1218                  }
1219  
1220                  Operand res = context.Multiply(op1, op2);
1221                  res = context.ShiftRightSI(res, Const(eSize - 1));
1222                  res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
1223  
1224                  if (op.Size == 2)
1225                  {
1226                      res = context.ConvertI64ToI32(res);
1227                  }
1228  
1229                  return res;
1230              }, signed: true);
1231          }
1232  
1233          public static void Vqmovn(ArmEmitterContext context)
1234          {
1235              OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
1236  
1237              bool signed = !op.Q;
1238  
1239              EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signed, signed), signed);
1240          }
1241  
1242          public static void Vqmovun(ArmEmitterContext context)
1243          {
1244              OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp;
1245  
1246              EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
1247          }
1248  
1249          public static void Vqrdmulh(ArmEmitterContext context)
1250          {
1251              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1252              int eSize = 8 << op.Size;
1253  
1254              EmitVectorBinaryOpI32(context, (op1, op2) =>
1255              {
1256                  if (op.Size == 2)
1257                  {
1258                      op1 = context.SignExtend32(OperandType.I64, op1);
1259                      op2 = context.SignExtend32(OperandType.I64, op2);
1260                  }
1261  
1262                  Operand res = context.Multiply(op1, op2);
1263                  res = context.Add(res, Const(res.Type, 1L << (eSize - 2)));
1264                  res = context.ShiftRightSI(res, Const(eSize - 1));
1265                  res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
1266  
1267                  if (op.Size == 2)
1268                  {
1269                      res = context.ConvertI64ToI32(res);
1270                  }
1271  
1272                  return res;
1273              }, signed: true);
1274          }
1275  
1276          public static void Vqsub(ArmEmitterContext context)
1277          {
1278              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1279  
1280              EmitSaturatingAddSubBinaryOp(context, add: false, !op.U);
1281          }
1282  
1283          public static void Vrev(ArmEmitterContext context)
1284          {
1285              OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp;
1286  
1287              if (Optimizations.UseSsse3)
1288              {
1289                  EmitVectorUnaryOpSimd32(context, (op1) =>
1290                  {
1291                      Operand mask;
1292                      switch (op.Size)
1293                      {
1294                          case 3:
1295                              // Rev64
1296                              switch (op.Opc)
1297                              {
1298                                  case 0:
1299                                      mask = X86GetElements(context, 0x08090a0b0c0d0e0fL, 0x0001020304050607L);
1300                                      return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
1301                                  case 1:
1302                                      mask = X86GetElements(context, 0x09080b0a0d0c0f0eL, 0x0100030205040706L);
1303                                      return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
1304                                  case 2:
1305                                      return context.AddIntrinsic(Intrinsic.X86Shufps, op1, op1, Const(1 | (0 << 2) | (3 << 4) | (2 << 6)));
1306                              }
1307                              break;
1308                          case 2:
1309                              // Rev32
1310                              switch (op.Opc)
1311                              {
1312                                  case 0:
1313                                      mask = X86GetElements(context, 0x0c0d0e0f_08090a0bL, 0x04050607_00010203L);
1314                                      return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
1315                                  case 1:
1316                                      mask = X86GetElements(context, 0x0d0c0f0e_09080b0aL, 0x05040706_01000302L);
1317                                      return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
1318                              }
1319                              break;
1320                          case 1:
1321                              // Rev16
1322                              mask = X86GetElements(context, 0x0e0f_0c0d_0a0b_0809L, 0x_0607_0405_0203_0001L);
1323                              return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask);
1324                      }
1325  
1326                      throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
1327                  });
1328              }
1329              else
1330              {
1331                  EmitVectorUnaryOpZx32(context, (op1) =>
1332                  {
1333                      switch (op.Opc)
1334                      {
1335                          case 0:
1336                              switch (op.Size) // Swap bytes.
1337                              {
1338                                  case 1:
1339                                      return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1);
1340                                  case 2:
1341                                  case 3:
1342                                      return context.ByteSwap(op1);
1343                              }
1344                              break;
1345                          case 1:
1346                              switch (op.Size)
1347                              {
1348                                  case 2:
1349                                      return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)),
1350                                                                  context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16)));
1351                                  case 3:
1352                                      return context.BitwiseOr(
1353                                          context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)),
1354                                                               context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))),
1355                                          context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)),
1356                                                               context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16))));
1357                              }
1358                              break;
1359                          case 2:
1360                              // Swap upper and lower halves.
1361                              return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)),
1362                                                          context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32)));
1363                      }
1364  
1365                      throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable.
1366                  });
1367              }
1368          }
1369  
1370          public static void Vrecpe(ArmEmitterContext context)
1371          {
1372              OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
1373  
1374              if (op.F)
1375              {
1376                  int sizeF = op.Size & 1;
1377  
1378                  if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1379                  {
1380                      InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
1381                  }
1382                  else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
1383                  {
1384                      EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
1385                  }
1386                  else
1387                  {
1388                      EmitVectorUnaryOpF32(context, (op1) =>
1389                      {
1390                          return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRecipEstimateFpscr), op1);
1391                      });
1392                  }
1393              }
1394              else
1395              {
1396                  throw new NotImplementedException("Integer Vrecpe not currently implemented.");
1397              }
1398          }
1399  
1400          public static void Vrecps(ArmEmitterContext context)
1401          {
1402              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1403              {
1404                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
1405              }
1406              else if (Optimizations.FastFP && Optimizations.UseSse2)
1407              {
1408                  OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1409                  bool single = (op.Size & 1) == 0;
1410  
1411                  // (2 - (n*m))
1412                  EmitVectorBinaryOpSimd32(context, (n, m) =>
1413                  {
1414                      if (single)
1415                      {
1416                          Operand maskTwo = X86GetAllElements(context, 2f);
1417  
1418                          Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
1419  
1420                          return context.AddIntrinsic(Intrinsic.X86Subps, maskTwo, res);
1421                      }
1422                      else
1423                      {
1424                          Operand maskTwo = X86GetAllElements(context, 2d);
1425  
1426                          Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
1427  
1428                          return context.AddIntrinsic(Intrinsic.X86Subpd, maskTwo, res);
1429                      }
1430                  });
1431              }
1432              else
1433              {
1434                  EmitVectorBinaryOpF32(context, (op1, op2) =>
1435                  {
1436                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStep), op1, op2);
1437                  });
1438              }
1439          }
1440  
1441          public static void Vrhadd(ArmEmitterContext context)
1442          {
1443              OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1444  
1445              EmitVectorBinaryOpI32(context, (op1, op2) =>
1446              {
1447                  if (op.Size == 2)
1448                  {
1449                      op1 = context.ZeroExtend32(OperandType.I64, op1);
1450                      op2 = context.ZeroExtend32(OperandType.I64, op2);
1451                  }
1452  
1453                  Operand res = context.Add(context.Add(op1, op2), Const(op1.Type, 1L));
1454                  res = context.ShiftRightUI(res, Const(1));
1455  
1456                  if (op.Size == 2)
1457                  {
1458                      res = context.ConvertI64ToI32(res);
1459                  }
1460  
1461                  return res;
1462              }, !op.U);
1463          }
1464  
1465          public static void Vrsqrte(ArmEmitterContext context)
1466          {
1467              OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp;
1468  
1469              if (op.F)
1470              {
1471                  int sizeF = op.Size & 1;
1472  
1473                  if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1474                  {
1475                      InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
1476                  }
1477                  else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
1478                  {
1479                      EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
1480                  }
1481                  else
1482                  {
1483                      EmitVectorUnaryOpF32(context, (op1) =>
1484                      {
1485                          return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRSqrtEstimateFpscr), op1);
1486                      });
1487                  }
1488              }
1489              else
1490              {
1491                  throw new NotImplementedException("Integer Vrsqrte not currently implemented.");
1492              }
1493          }
1494  
1495          public static void Vrsqrts(ArmEmitterContext context)
1496          {
1497              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1498              {
1499                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
1500              }
1501              else if (Optimizations.FastFP && Optimizations.UseSse2)
1502              {
1503                  OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1504                  bool single = (op.Size & 1) == 0;
1505  
1506                  // (3 - (n*m)) / 2
1507                  EmitVectorBinaryOpSimd32(context, (n, m) =>
1508                  {
1509                      if (single)
1510                      {
1511                          Operand maskHalf = X86GetAllElements(context, 0.5f);
1512                          Operand maskThree = X86GetAllElements(context, 3f);
1513  
1514                          Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
1515  
1516                          res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
1517                          return context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
1518                      }
1519                      else
1520                      {
1521                          Operand maskHalf = X86GetAllElements(context, 0.5d);
1522                          Operand maskThree = X86GetAllElements(context, 3d);
1523  
1524                          Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
1525  
1526                          res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
1527                          return context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
1528                      }
1529                  });
1530              }
1531              else
1532              {
1533                  EmitVectorBinaryOpF32(context, (op1, op2) =>
1534                  {
1535                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStep), op1, op2);
1536                  });
1537              }
1538          }
1539  
1540          public static void Vsel(ArmEmitterContext context)
1541          {
1542              OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp;
1543  
1544              Operand condition = default;
1545  
1546              switch (op.Cc)
1547              {
1548                  case OpCode32SimdSelMode.Eq:
1549                      condition = GetCondTrue(context, Condition.Eq);
1550                      break;
1551                  case OpCode32SimdSelMode.Ge:
1552                      condition = GetCondTrue(context, Condition.Ge);
1553                      break;
1554                  case OpCode32SimdSelMode.Gt:
1555                      condition = GetCondTrue(context, Condition.Gt);
1556                      break;
1557                  case OpCode32SimdSelMode.Vs:
1558                      condition = GetCondTrue(context, Condition.Vs);
1559                      break;
1560              }
1561  
1562              EmitScalarBinaryOpI32(context, (op1, op2) =>
1563              {
1564                  return context.ConditionalSelect(condition, op1, op2);
1565              });
1566          }
1567  
1568          public static void Vsqrt_S(ArmEmitterContext context)
1569          {
1570              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1571              {
1572                  InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
1573              }
1574              else if (Optimizations.FastFP && Optimizations.UseSse2)
1575              {
1576                  EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
1577              }
1578              else
1579              {
1580                  EmitScalarUnaryOpF32(context, (op1) =>
1581                  {
1582                      return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1);
1583                  });
1584              }
1585          }
1586  
1587          public static void Vsub_S(ArmEmitterContext context)
1588          {
1589              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1590              {
1591                  InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
1592              }
1593              else if (Optimizations.FastFP && Optimizations.UseSse2)
1594              {
1595                  EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
1596              }
1597              else
1598              {
1599                  EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
1600              }
1601          }
1602  
1603          public static void Vsub_V(ArmEmitterContext context)
1604          {
1605              if (Optimizations.FastFP && Optimizations.UseAdvSimd)
1606              {
1607                  InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
1608              }
1609              else if (Optimizations.FastFP && Optimizations.UseSse2)
1610              {
1611                  EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
1612              }
1613              else
1614              {
1615                  EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2));
1616              }
1617          }
1618  
1619          public static void Vsub_I(ArmEmitterContext context)
1620          {
1621              if (Optimizations.UseSse2)
1622              {
1623                  OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
1624                  EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PsubInstruction[op.Size], op1, op2));
1625              }
1626              else
1627              {
1628                  EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2));
1629              }
1630          }
1631  
1632          public static void Vsubl_I(ArmEmitterContext context)
1633          {
1634              OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
1635  
1636              EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
1637          }
1638  
1639          public static void Vsubw_I(ArmEmitterContext context)
1640          {
1641              OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp;
1642  
1643              EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U);
1644          }
1645  
1646          private static void EmitSaturatingAddSubBinaryOp(ArmEmitterContext context, bool add, bool signed)
1647          {
1648              OpCode32Simd op = (OpCode32Simd)context.CurrOp;
1649  
1650              EmitVectorBinaryOpI32(context, (ne, me) =>
1651              {
1652                  if (op.Size <= 2)
1653                  {
1654                      if (op.Size == 2)
1655                      {
1656                          ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
1657                          me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
1658                      }
1659  
1660                      Operand res = add ? context.Add(ne, me) : context.Subtract(ne, me);
1661  
1662                      res = EmitSatQ(context, res, 8 << op.Size, signedSrc: true, signed);
1663  
1664                      if (op.Size == 2)
1665                      {
1666                          res = context.ConvertI64ToI32(res);
1667                      }
1668  
1669                      return res;
1670                  }
1671                  else if (add) /* if (op.Size == 3) */
1672                  {
1673                      return signed
1674                          ? EmitBinarySignedSatQAdd(context, ne, me)
1675                          : EmitBinaryUnsignedSatQAdd(context, ne, me);
1676                  }
1677                  else /* if (sub) */
1678                  {
1679                      return signed
1680                          ? EmitBinarySignedSatQSub(context, ne, me)
1681                          : EmitBinaryUnsignedSatQSub(context, ne, me);
1682                  }
1683              }, signed);
1684          }
1685  
1686          private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar)
1687          {
1688              IOpCode32Simd op = (IOpCode32Simd)context.CurrOp;
1689  
1690              Operand genericEmit(Operand n, Operand m)
1691              {
1692                  Operand nNum = context.Copy(n);
1693                  Operand mNum = context.Copy(m);
1694  
1695                  InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true);
1696                  InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true);
1697  
1698                  int sizeF = op.Size & 1;
1699  
1700                  if (sizeF == 0)
1701                  {
1702                      Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
1703  
1704                      Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
1705                      Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
1706  
1707                      nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
1708                      mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
1709  
1710                      return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
1711                  }
1712                  else /* if (sizeF == 1) */
1713                  {
1714                      Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
1715  
1716                      Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
1717                      Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
1718  
1719                      nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
1720                      mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
1721  
1722                      return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
1723                  }
1724              }
1725  
1726              if (scalar)
1727              {
1728                  EmitScalarBinaryOpSimd32(context, genericEmit);
1729              }
1730              else
1731              {
1732                  EmitVectorBinaryOpSimd32(context, genericEmit);
1733              }
1734          }
1735      }
1736  }