/ src / ARMeilleure / Instructions / InstEmitSimdCvt.cs
InstEmitSimdCvt.cs
   1  using ARMeilleure.Decoders;
   2  using ARMeilleure.IntermediateRepresentation;
   3  using ARMeilleure.State;
   4  using ARMeilleure.Translation;
   5  using System;
   6  using System.Diagnostics;
   7  using System.Reflection;
   8  using static ARMeilleure.Instructions.InstEmitHelper;
   9  using static ARMeilleure.Instructions.InstEmitSimdHelper;
  10  using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  11  
  12  namespace ARMeilleure.Instructions
  13  {
  14      using Func1I = Func<Operand, Operand>;
  15  
  16      static partial class InstEmit
  17      {
  18          public static void Fcvt_S(ArmEmitterContext context)
  19          {
  20              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
  21  
  22              if (op.Size == 0 && op.Opc == 1) // Single -> Double.
  23              {
  24                  if (Optimizations.UseSse2)
  25                  {
  26                      Operand n = GetVec(op.Rn);
  27  
  28                      Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
  29  
  30                      context.Copy(GetVec(op.Rd), res);
  31                  }
  32                  else
  33                  {
  34                      Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
  35  
  36                      Operand res = context.ConvertToFP(OperandType.FP64, ne);
  37  
  38                      context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  39                  }
  40              }
  41              else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
  42              {
  43                  if (Optimizations.UseSse2)
  44                  {
  45                      Operand n = GetVec(op.Rn);
  46  
  47                      Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
  48  
  49                      context.Copy(GetVec(op.Rd), res);
  50                  }
  51                  else
  52                  {
  53                      Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
  54  
  55                      Operand res = context.ConvertToFP(OperandType.FP32, ne);
  56  
  57                      context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
  58                  }
  59              }
  60              else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
  61              {
  62                  if (Optimizations.UseF16c)
  63                  {
  64                      Debug.Assert(!Optimizations.ForceLegacySse);
  65  
  66                      Operand n = GetVec(op.Rn);
  67  
  68                      Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
  69                      res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112()
  70                      res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14));
  71  
  72                      context.Copy(GetVec(op.Rd), res);
  73                  }
  74                  else
  75                  {
  76                      Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
  77  
  78                      context.StoreToContext();
  79                      Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
  80                      context.LoadFromContext();
  81  
  82                      res = context.ZeroExtend16(OperandType.I64, res);
  83  
  84                      context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
  85                  }
  86              }
  87              else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
  88              {
  89                  if (Optimizations.UseF16c)
  90                  {
  91                      Debug.Assert(!Optimizations.ForceLegacySse);
  92  
  93                      Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
  94                      res = context.VectorZeroUpper96(res);
  95  
  96                      context.Copy(GetVec(op.Rd), res);
  97                  }
  98                  else
  99                  {
 100                      Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
 101  
 102                      context.StoreToContext();
 103                      Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
 104                      context.LoadFromContext();
 105  
 106                      context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
 107                  }
 108              }
 109              else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
 110              {
 111                  if (Optimizations.UseF16c)
 112                  {
 113                      Debug.Assert(!Optimizations.ForceLegacySse);
 114  
 115                      Operand n = GetVec(op.Rn);
 116  
 117                      Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
 118                      res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
 119  
 120                      context.Copy(GetVec(op.Rd), res);
 121                  }
 122                  else
 123                  {
 124                      Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
 125  
 126                      context.StoreToContext();
 127                      Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne);
 128                      context.LoadFromContext();
 129  
 130                      res = context.ZeroExtend16(OperandType.I64, res);
 131  
 132                      context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
 133                  }
 134              }
 135              else if (op.Size == 3 && op.Opc == 1) // Half -> Double.
 136              {
 137                  if (Optimizations.UseF16c)
 138                  {
 139                      Operand n = GetVec(op.Rn);
 140  
 141                      Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
 142                      res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
 143                      res = context.VectorZeroUpper64(res);
 144  
 145                      context.Copy(GetVec(op.Rd), res);
 146                  }
 147                  else
 148                  {
 149                      Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
 150  
 151                      context.StoreToContext();
 152                      Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne);
 153                      context.LoadFromContext();
 154  
 155                      context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
 156                  }
 157              }
 158              else // Invalid encoding.
 159              {
 160                  Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
 161              }
 162          }
 163  
 164          public static void Fcvtas_Gp(ArmEmitterContext context)
 165          {
 166              if (Optimizations.UseAdvSimd)
 167              {
 168                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
 169              }
 170              else if (Optimizations.UseSse41)
 171              {
 172                  EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
 173              }
 174              else
 175              {
 176                  EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
 177              }
 178          }
 179  
 180          public static void Fcvtas_S(ArmEmitterContext context)
 181          {
 182              if (Optimizations.UseAdvSimd)
 183              {
 184                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
 185              }
 186              else if (Optimizations.UseSse41)
 187              {
 188                  EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
 189              }
 190              else
 191              {
 192                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
 193              }
 194          }
 195  
 196          public static void Fcvtas_V(ArmEmitterContext context)
 197          {
 198              if (Optimizations.UseAdvSimd)
 199              {
 200                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
 201              }
 202              else if (Optimizations.UseSse41)
 203              {
 204                  EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
 205              }
 206              else
 207              {
 208                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
 209              }
 210          }
 211  
 212          public static void Fcvtau_Gp(ArmEmitterContext context)
 213          {
 214              if (Optimizations.UseAdvSimd)
 215              {
 216                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
 217              }
 218              else if (Optimizations.UseSse41)
 219              {
 220                  EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
 221              }
 222              else
 223              {
 224                  EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
 225              }
 226          }
 227  
 228          public static void Fcvtau_S(ArmEmitterContext context)
 229          {
 230              if (Optimizations.UseAdvSimd)
 231              {
 232                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
 233              }
 234              else if (Optimizations.UseSse41)
 235              {
 236                  EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
 237              }
 238              else
 239              {
 240                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
 241              }
 242          }
 243  
 244          public static void Fcvtau_V(ArmEmitterContext context)
 245          {
 246              if (Optimizations.UseAdvSimd)
 247              {
 248                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
 249              }
 250              else if (Optimizations.UseSse41)
 251              {
 252                  EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
 253              }
 254              else
 255              {
 256                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
 257              }
 258          }
 259  
 260          public static void Fcvtl_V(ArmEmitterContext context)
 261          {
 262              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 263  
 264              int sizeF = op.Size & 1;
 265  
 266              if (Optimizations.UseAdvSimd)
 267              {
 268                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
 269              }
 270              else if (Optimizations.UseSse2 && sizeF == 1)
 271              {
 272                  Operand n = GetVec(op.Rn);
 273  
 274                  Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
 275                  res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
 276  
 277                  context.Copy(GetVec(op.Rd), res);
 278              }
 279              else if (Optimizations.UseF16c && sizeF == 0)
 280              {
 281                  Debug.Assert(!Optimizations.ForceLegacySse);
 282  
 283                  Operand n = GetVec(op.Rn);
 284  
 285                  Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
 286                  res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
 287  
 288                  context.Copy(GetVec(op.Rd), res);
 289              }
 290              else
 291              {
 292                  Operand res = context.VectorZero();
 293  
 294                  int elems = 4 >> sizeF;
 295  
 296                  int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
 297  
 298                  for (int index = 0; index < elems; index++)
 299                  {
 300                      if (sizeF == 0)
 301                      {
 302                          Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
 303  
 304                          context.StoreToContext();
 305                          Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
 306                          context.LoadFromContext();
 307  
 308                          res = context.VectorInsert(res, e, index);
 309                      }
 310                      else /* if (sizeF == 1) */
 311                      {
 312                          Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
 313  
 314                          Operand e = context.ConvertToFP(OperandType.FP64, ne);
 315  
 316                          res = context.VectorInsert(res, e, index);
 317                      }
 318                  }
 319  
 320                  context.Copy(GetVec(op.Rd), res);
 321              }
 322          }
 323  
 324          public static void Fcvtms_Gp(ArmEmitterContext context)
 325          {
 326              if (Optimizations.UseAdvSimd)
 327              {
 328                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
 329              }
 330              else if (Optimizations.UseSse41)
 331              {
 332                  EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
 333              }
 334              else
 335              {
 336                  EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
 337              }
 338          }
 339  
 340          public static void Fcvtms_V(ArmEmitterContext context)
 341          {
 342              if (Optimizations.UseAdvSimd)
 343              {
 344                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
 345              }
 346              else if (Optimizations.UseSse41)
 347              {
 348                  EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
 349              }
 350              else
 351              {
 352                  EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false);
 353              }
 354          }
 355  
 356          public static void Fcvtmu_Gp(ArmEmitterContext context)
 357          {
 358              if (Optimizations.UseAdvSimd)
 359              {
 360                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
 361              }
 362              else if (Optimizations.UseSse41)
 363              {
 364                  EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
 365              }
 366              else
 367              {
 368                  EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
 369              }
 370          }
 371  
 372          public static void Fcvtn_V(ArmEmitterContext context)
 373          {
 374              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 375  
 376              int sizeF = op.Size & 1;
 377  
 378              if (Optimizations.UseAdvSimd)
 379              {
 380                  InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
 381              }
 382              else if (Optimizations.UseSse2 && sizeF == 1)
 383              {
 384                  Operand d = GetVec(op.Rd);
 385  
 386                  Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
 387  
 388                  Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
 389                  nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
 390  
 391                  Operand res = context.VectorZeroUpper64(d);
 392                  res = context.AddIntrinsic(movInst, res, nInt);
 393  
 394                  context.Copy(d, res);
 395              }
 396              else if (Optimizations.UseF16c && sizeF == 0)
 397              {
 398                  Debug.Assert(!Optimizations.ForceLegacySse);
 399  
 400                  Operand d = GetVec(op.Rd);
 401                  Operand n = GetVec(op.Rn);
 402  
 403                  Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
 404  
 405                  Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
 406                  nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
 407  
 408                  Operand res = context.VectorZeroUpper64(d);
 409                  res = context.AddIntrinsic(movInst, res, nInt);
 410  
 411                  context.Copy(d, res);
 412              }
 413              else
 414              {
 415                  OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
 416  
 417                  int elems = 4 >> sizeF;
 418  
 419                  int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
 420  
 421                  Operand d = GetVec(op.Rd);
 422  
 423                  Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
 424  
 425                  for (int index = 0; index < elems; index++)
 426                  {
 427                      Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
 428  
 429                      if (sizeF == 0)
 430                      {
 431                          context.StoreToContext();
 432                          Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
 433                          context.LoadFromContext();
 434  
 435                          res = EmitVectorInsert(context, res, e, part + index, 1);
 436                      }
 437                      else /* if (sizeF == 1) */
 438                      {
 439                          Operand e = context.ConvertToFP(OperandType.FP32, ne);
 440  
 441                          res = context.VectorInsert(res, e, part + index);
 442                      }
 443                  }
 444  
 445                  context.Copy(d, res);
 446              }
 447          }
 448  
 449          public static void Fcvtns_Gp(ArmEmitterContext context)
 450          {
 451              if (Optimizations.UseAdvSimd)
 452              {
 453                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
 454              }
 455              else if (Optimizations.UseSse41)
 456              {
 457                  EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
 458              }
 459              else
 460              {
 461                  EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1));
 462              }
 463          }
 464  
 465          public static void Fcvtns_S(ArmEmitterContext context)
 466          {
 467              if (Optimizations.UseAdvSimd)
 468              {
 469                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
 470              }
 471              else if (Optimizations.UseSse41)
 472              {
 473                  EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
 474              }
 475              else
 476              {
 477                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
 478              }
 479          }
 480  
 481          public static void Fcvtns_V(ArmEmitterContext context)
 482          {
 483              if (Optimizations.UseAdvSimd)
 484              {
 485                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
 486              }
 487              else if (Optimizations.UseSse41)
 488              {
 489                  EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
 490              }
 491              else
 492              {
 493                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
 494              }
 495          }
 496  
 497          public static void Fcvtnu_S(ArmEmitterContext context)
 498          {
 499              if (Optimizations.UseAdvSimd)
 500              {
 501                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
 502              }
 503              else if (Optimizations.UseSse41)
 504              {
 505                  EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
 506              }
 507              else
 508              {
 509                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
 510              }
 511          }
 512  
 513          public static void Fcvtnu_V(ArmEmitterContext context)
 514          {
 515              if (Optimizations.UseAdvSimd)
 516              {
 517                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
 518              }
 519              else if (Optimizations.UseSse41)
 520              {
 521                  EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
 522              }
 523              else
 524              {
 525                  EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
 526              }
 527          }
 528  
 529          public static void Fcvtps_Gp(ArmEmitterContext context)
 530          {
 531              if (Optimizations.UseAdvSimd)
 532              {
 533                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
 534              }
 535              else if (Optimizations.UseSse41)
 536              {
 537                  EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
 538              }
 539              else
 540              {
 541                  EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
 542              }
 543          }
 544  
 545          public static void Fcvtpu_Gp(ArmEmitterContext context)
 546          {
 547              if (Optimizations.UseAdvSimd)
 548              {
 549                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
 550              }
 551              else if (Optimizations.UseSse41)
 552              {
 553                  EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
 554              }
 555              else
 556              {
 557                  EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
 558              }
 559          }
 560  
 561          public static void Fcvtzs_Gp(ArmEmitterContext context)
 562          {
 563              if (Optimizations.UseAdvSimd)
 564              {
 565                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
 566              }
 567              else if (Optimizations.UseSse41)
 568              {
 569                  EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
 570              }
 571              else
 572              {
 573                  EmitFcvt_s_Gp(context, (op1) => op1);
 574              }
 575          }
 576  
 577          public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
 578          {
 579              if (Optimizations.UseAdvSimd)
 580              {
 581                  OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 582  
 583                  InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
 584              }
 585              else if (Optimizations.UseSse41)
 586              {
 587                  EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
 588              }
 589              else
 590              {
 591                  EmitFcvtzs_Gp_Fixed(context);
 592              }
 593          }
 594  
 595          public static void Fcvtzs_S(ArmEmitterContext context)
 596          {
 597              if (Optimizations.UseAdvSimd)
 598              {
 599                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
 600              }
 601              else if (Optimizations.UseSse41)
 602              {
 603                  EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
 604              }
 605              else
 606              {
 607                  EmitFcvtz(context, signed: true, scalar: true);
 608              }
 609          }
 610  
 611          public static void Fcvtzs_V(ArmEmitterContext context)
 612          {
 613              if (Optimizations.UseAdvSimd)
 614              {
 615                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
 616              }
 617              else if (Optimizations.UseSse41)
 618              {
 619                  EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
 620              }
 621              else
 622              {
 623                  EmitFcvtz(context, signed: true, scalar: false);
 624              }
 625          }
 626  
 627          public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
 628          {
 629              if (Optimizations.UseAdvSimd)
 630              {
 631                  InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
 632              }
 633              else if (Optimizations.UseSse41)
 634              {
 635                  EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
 636              }
 637              else
 638              {
 639                  EmitFcvtz(context, signed: true, scalar: false);
 640              }
 641          }
 642  
 643          public static void Fcvtzu_Gp(ArmEmitterContext context)
 644          {
 645              if (Optimizations.UseAdvSimd)
 646              {
 647                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
 648              }
 649              else if (Optimizations.UseSse41)
 650              {
 651                  EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
 652              }
 653              else
 654              {
 655                  EmitFcvt_u_Gp(context, (op1) => op1);
 656              }
 657          }
 658  
 659          public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
 660          {
 661              if (Optimizations.UseAdvSimd)
 662              {
 663                  OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 664  
 665                  InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
 666              }
 667              else if (Optimizations.UseSse41)
 668              {
 669                  EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
 670              }
 671              else
 672              {
 673                  EmitFcvtzu_Gp_Fixed(context);
 674              }
 675          }
 676  
 677          public static void Fcvtzu_S(ArmEmitterContext context)
 678          {
 679              if (Optimizations.UseAdvSimd)
 680              {
 681                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
 682              }
 683              else if (Optimizations.UseSse41)
 684              {
 685                  EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
 686              }
 687              else
 688              {
 689                  EmitFcvtz(context, signed: false, scalar: true);
 690              }
 691          }
 692  
 693          public static void Fcvtzu_V(ArmEmitterContext context)
 694          {
 695              if (Optimizations.UseAdvSimd)
 696              {
 697                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
 698              }
 699              else if (Optimizations.UseSse41)
 700              {
 701                  EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
 702              }
 703              else
 704              {
 705                  EmitFcvtz(context, signed: false, scalar: false);
 706              }
 707          }
 708  
 709          public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
 710          {
 711              if (Optimizations.UseAdvSimd)
 712              {
 713                  InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
 714              }
 715              else if (Optimizations.UseSse41)
 716              {
 717                  EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
 718              }
 719              else
 720              {
 721                  EmitFcvtz(context, signed: false, scalar: false);
 722              }
 723          }
 724  
 725          public static void Scvtf_Gp(ArmEmitterContext context)
 726          {
 727              if (Optimizations.UseAdvSimd)
 728              {
 729                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
 730              }
 731              else
 732              {
 733                  OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 734  
 735                  Operand res = GetIntOrZR(context, op.Rn);
 736  
 737                  if (op.RegisterSize == RegisterSize.Int32)
 738                  {
 739                      res = context.SignExtend32(OperandType.I64, res);
 740                  }
 741  
 742                  res = EmitFPConvert(context, res, op.Size, signed: true);
 743  
 744                  context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
 745              }
 746          }
 747  
 748          public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
 749          {
 750              OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 751  
 752              if (Optimizations.UseAdvSimd)
 753              {
 754                  InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
 755              }
 756              else
 757              {
 758                  Operand res = GetIntOrZR(context, op.Rn);
 759  
 760                  if (op.RegisterSize == RegisterSize.Int32)
 761                  {
 762                      res = context.SignExtend32(OperandType.I64, res);
 763                  }
 764  
 765                  res = EmitFPConvert(context, res, op.Size, signed: true);
 766  
 767                  res = EmitI2fFBitsMul(context, res, op.FBits);
 768  
 769                  context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
 770              }
 771          }
 772  
 773          public static void Scvtf_S(ArmEmitterContext context)
 774          {
 775              if (Optimizations.UseAdvSimd)
 776              {
 777                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
 778              }
 779              else if (Optimizations.UseSse2)
 780              {
 781                  EmitSse2ScvtfOp(context, scalar: true);
 782              }
 783              else
 784              {
 785                  EmitCvtf(context, signed: true, scalar: true);
 786              }
 787          }
 788  
 789          public static void Scvtf_S_Fixed(ArmEmitterContext context)
 790          {
 791              if (Optimizations.UseAdvSimd)
 792              {
 793                  InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
 794              }
 795              else if (Optimizations.UseSse2)
 796              {
 797                  EmitSse2ScvtfOp(context, scalar: true);
 798              }
 799              else
 800              {
 801                  EmitCvtf(context, signed: true, scalar: true);
 802              }
 803          }
 804  
 805          public static void Scvtf_V(ArmEmitterContext context)
 806          {
 807              if (Optimizations.UseAdvSimd)
 808              {
 809                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
 810              }
 811              else if (Optimizations.UseSse2)
 812              {
 813                  EmitSse2ScvtfOp(context, scalar: false);
 814              }
 815              else
 816              {
 817                  EmitCvtf(context, signed: true, scalar: false);
 818              }
 819          }
 820  
 821          public static void Scvtf_V_Fixed(ArmEmitterContext context)
 822          {
 823              if (Optimizations.UseAdvSimd)
 824              {
 825                  InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
 826              }
 827              else if (Optimizations.UseSse2)
 828              {
 829                  EmitSse2ScvtfOp(context, scalar: false);
 830              }
 831              else
 832              {
 833                  EmitCvtf(context, signed: true, scalar: false);
 834              }
 835          }
 836  
 837          public static void Ucvtf_Gp(ArmEmitterContext context)
 838          {
 839              if (Optimizations.UseAdvSimd)
 840              {
 841                  InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
 842              }
 843              else
 844              {
 845                  OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 846  
 847                  Operand res = GetIntOrZR(context, op.Rn);
 848  
 849                  res = EmitFPConvert(context, res, op.Size, signed: false);
 850  
 851                  context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
 852              }
 853          }
 854  
 855          public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
 856          {
 857              OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
 858  
 859              if (Optimizations.UseAdvSimd)
 860              {
 861                  InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
 862              }
 863              else
 864              {
 865                  Operand res = GetIntOrZR(context, op.Rn);
 866  
 867                  res = EmitFPConvert(context, res, op.Size, signed: false);
 868  
 869                  res = EmitI2fFBitsMul(context, res, op.FBits);
 870  
 871                  context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
 872              }
 873          }
 874  
 875          public static void Ucvtf_S(ArmEmitterContext context)
 876          {
 877              if (Optimizations.UseAdvSimd)
 878              {
 879                  InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
 880              }
 881              else if (Optimizations.UseSse2)
 882              {
 883                  EmitSse2UcvtfOp(context, scalar: true);
 884              }
 885              else
 886              {
 887                  EmitCvtf(context, signed: false, scalar: true);
 888              }
 889          }
 890  
 891          public static void Ucvtf_S_Fixed(ArmEmitterContext context)
 892          {
 893              if (Optimizations.UseAdvSimd)
 894              {
 895                  InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
 896              }
 897              else if (Optimizations.UseSse2)
 898              {
 899                  EmitSse2UcvtfOp(context, scalar: true);
 900              }
 901              else
 902              {
 903                  EmitCvtf(context, signed: false, scalar: true);
 904              }
 905          }
 906  
 907          public static void Ucvtf_V(ArmEmitterContext context)
 908          {
 909              if (Optimizations.UseAdvSimd)
 910              {
 911                  InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
 912              }
 913              else if (Optimizations.UseSse2)
 914              {
 915                  EmitSse2UcvtfOp(context, scalar: false);
 916              }
 917              else
 918              {
 919                  EmitCvtf(context, signed: false, scalar: false);
 920              }
 921          }
 922  
 923          public static void Ucvtf_V_Fixed(ArmEmitterContext context)
 924          {
 925              if (Optimizations.UseAdvSimd)
 926              {
 927                  InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
 928              }
 929              else if (Optimizations.UseSse2)
 930              {
 931                  EmitSse2UcvtfOp(context, scalar: false);
 932              }
 933              else
 934              {
 935                  EmitCvtf(context, signed: false, scalar: false);
 936              }
 937          }
 938  
 939          private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
 940          {
 941              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 942  
 943              Operand res = context.VectorZero();
 944  
 945              Operand n = GetVec(op.Rn);
 946  
 947              int sizeF = op.Size & 1;
 948              int sizeI = sizeF + 2;
 949  
 950              OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
 951  
 952              int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
 953  
 954              for (int index = 0; index < elems; index++)
 955              {
 956                  Operand ne = context.VectorExtract(type, n, index);
 957  
 958                  Operand e = emit(ne);
 959  
 960                  if (sizeF == 0)
 961                  {
 962                      MethodInfo info = signed
 963                          ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
 964                          : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
 965  
 966                      e = context.Call(info, e);
 967  
 968                      e = context.ZeroExtend32(OperandType.I64, e);
 969                  }
 970                  else /* if (sizeF == 1) */
 971                  {
 972                      MethodInfo info = signed
 973                          ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
 974                          : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
 975  
 976                      e = context.Call(info, e);
 977                  }
 978  
 979                  res = EmitVectorInsert(context, res, e, index, sizeI);
 980              }
 981  
 982              context.Copy(GetVec(op.Rd), res);
 983          }
 984  
 985          private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
 986          {
 987              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
 988  
 989              Operand res = context.VectorZero();
 990  
 991              Operand n = GetVec(op.Rn);
 992  
 993              int sizeF = op.Size & 1;
 994              int sizeI = sizeF + 2;
 995  
 996              OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
 997  
 998              int fBits = GetFBits(context);
 999  
1000              int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
1001  
1002              for (int index = 0; index < elems; index++)
1003              {
1004                  Operand ne = context.VectorExtract(type, n, index);
1005  
1006                  Operand e = EmitF2iFBitsMul(context, ne, fBits);
1007  
1008                  if (sizeF == 0)
1009                  {
1010                      MethodInfo info = signed
1011                          ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
1012                          : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
1013  
1014                      e = context.Call(info, e);
1015  
1016                      e = context.ZeroExtend32(OperandType.I64, e);
1017                  }
1018                  else /* if (sizeF == 1) */
1019                  {
1020                      MethodInfo info = signed
1021                          ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
1022                          : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
1023  
1024                      e = context.Call(info, e);
1025                  }
1026  
1027                  res = EmitVectorInsert(context, res, e, index, sizeI);
1028              }
1029  
1030              context.Copy(GetVec(op.Rd), res);
1031          }
1032  
1033          private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
1034          {
1035              EmitFcvt___Gp(context, emit, signed: true);
1036          }
1037  
1038          private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
1039          {
1040              EmitFcvt___Gp(context, emit, signed: false);
1041          }
1042  
1043          private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
1044          {
1045              OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
1046  
1047              OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
1048  
1049              Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
1050  
1051              Operand res = signed
1052                  ? EmitScalarFcvts(context, emit(ne), 0)
1053                  : EmitScalarFcvtu(context, emit(ne), 0);
1054  
1055              SetIntOrZR(context, op.Rd, res);
1056          }
1057  
1058          private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
1059          {
1060              EmitFcvtz__Gp_Fixed(context, signed: true);
1061          }
1062  
1063          private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
1064          {
1065              EmitFcvtz__Gp_Fixed(context, signed: false);
1066          }
1067  
1068          private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
1069          {
1070              OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
1071  
1072              OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
1073  
1074              Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
1075  
1076              Operand res = signed
1077                  ? EmitScalarFcvts(context, ne, op.FBits)
1078                  : EmitScalarFcvtu(context, ne, op.FBits);
1079  
1080              SetIntOrZR(context, op.Rd, res);
1081          }
1082  
1083          private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar)
1084          {
1085              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1086  
1087              Operand res = context.VectorZero();
1088  
1089              int sizeF = op.Size & 1;
1090              int sizeI = sizeF + 2;
1091  
1092              int fBits = GetFBits(context);
1093  
1094              int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
1095  
1096              for (int index = 0; index < elems; index++)
1097              {
1098                  Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
1099  
1100                  Operand e = EmitFPConvert(context, ne, sizeF, signed);
1101  
1102                  e = EmitI2fFBitsMul(context, e, fBits);
1103  
1104                  res = context.VectorInsert(res, e, index);
1105              }
1106  
1107              context.Copy(GetVec(op.Rd), res);
1108          }
1109  
1110          private static int GetFBits(ArmEmitterContext context)
1111          {
1112              if (context.CurrOp is OpCodeSimdShImm op)
1113              {
1114                  return GetImmShr(op);
1115              }
1116  
1117              return 0;
1118          }
1119  
1120          private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
1121          {
1122              Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
1123              Debug.Assert((uint)size < 2);
1124  
1125              OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
1126  
1127              if (signed)
1128              {
1129                  return context.ConvertToFP(type, value);
1130              }
1131              else
1132              {
1133                  return context.ConvertToFPUI(type, value);
1134              }
1135          }
1136  
1137          private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
1138          {
1139              Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
1140  
1141              value = EmitF2iFBitsMul(context, value, fBits);
1142  
1143              MethodInfo info;
1144  
1145              if (context.CurrOp.RegisterSize == RegisterSize.Int32)
1146              {
1147                  info = value.Type == OperandType.FP32
1148                      ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
1149                      : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
1150              }
1151              else
1152              {
1153                  info = value.Type == OperandType.FP32
1154                      ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
1155                      : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
1156              }
1157  
1158              return context.Call(info, value);
1159          }
1160  
1161          private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
1162          {
1163              Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
1164  
1165              value = EmitF2iFBitsMul(context, value, fBits);
1166  
1167              MethodInfo info;
1168  
1169              if (context.CurrOp.RegisterSize == RegisterSize.Int32)
1170              {
1171                  info = value.Type == OperandType.FP32
1172                      ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
1173                      : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
1174              }
1175              else
1176              {
1177                  info = value.Type == OperandType.FP32
1178                      ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
1179                      : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
1180              }
1181  
1182              return context.Call(info, value);
1183          }
1184  
1185          private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
1186          {
1187              Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
1188  
1189              if (fBits == 0)
1190              {
1191                  return value;
1192              }
1193  
1194              if (value.Type == OperandType.FP32)
1195              {
1196                  return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
1197              }
1198              else /* if (value.Type == OperandType.FP64) */
1199              {
1200                  return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
1201              }
1202          }
1203  
1204          private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
1205          {
1206              Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
1207  
1208              if (fBits == 0)
1209              {
1210                  return value;
1211              }
1212  
1213              if (value.Type == OperandType.FP32)
1214              {
1215                  return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
1216              }
1217              else /* if (value.Type == OperandType.FP64) */
1218              {
1219                  return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
1220              }
1221          }
1222  
1223          public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
1224          {
1225              Debug.Assert(opF.Type == OperandType.V128);
1226  
1227              Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, opF); // opFL
1228              Operand res = context.VectorCreateScalar(longL);
1229  
1230              if (!scalar)
1231              {
1232                  Operand opFH = context.AddIntrinsic(Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
1233                  Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, opFH);
1234                  Operand resH = context.VectorCreateScalar(longH);
1235                  res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, resH);
1236              }
1237  
1238              return res;
1239          }
1240  
1241          private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
1242          {
1243              Debug.Assert(op.Type == OperandType.V128);
1244  
1245              Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
1246              Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
1247  
1248              if (!scalar)
1249              {
1250                  Operand opH = context.AddIntrinsic(Intrinsic.X86Movhlps, res, op);    // res doesn't matter.
1251                  Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
1252                  Operand resH = context.AddIntrinsic(Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
1253                  res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, resH);
1254              }
1255  
1256              return res;
1257          }
1258  
1259          private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
1260          {
1261              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1262  
1263              Operand n = GetVec(op.Rn);
1264  
1265              // sizeF == ((OpCodeSimdShImm)op).Size - 2
1266              int sizeF = op.Size & 1;
1267  
1268              if (sizeF == 0)
1269              {
1270                  Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
1271  
1272                  if (op is OpCodeSimdShImm fixedOp)
1273                  {
1274                      int fBits = GetImmShr(fixedOp);
1275  
1276                      // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
1277                      int fpScaled = 0x3F800000 - fBits * 0x800000;
1278  
1279                      Operand fpScaledMask = scalar
1280                          ? X86GetScalar(context, fpScaled)
1281                          : X86GetAllElements(context, fpScaled);
1282  
1283                      res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
1284                  }
1285  
1286                  if (scalar)
1287                  {
1288                      res = context.VectorZeroUpper96(res);
1289                  }
1290                  else if (op.RegisterSize == RegisterSize.Simd64)
1291                  {
1292                      res = context.VectorZeroUpper64(res);
1293                  }
1294  
1295                  context.Copy(GetVec(op.Rd), res);
1296              }
1297              else /* if (sizeF == 1) */
1298              {
1299                  Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
1300  
1301                  if (op is OpCodeSimdShImm fixedOp)
1302                  {
1303                      int fBits = GetImmShr(fixedOp);
1304  
1305                      // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
1306                      long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
1307  
1308                      Operand fpScaledMask = scalar
1309                          ? X86GetScalar(context, fpScaled)
1310                          : X86GetAllElements(context, fpScaled);
1311  
1312                      res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
1313                  }
1314  
1315                  if (scalar)
1316                  {
1317                      res = context.VectorZeroUpper64(res);
1318                  }
1319  
1320                  context.Copy(GetVec(op.Rd), res);
1321              }
1322          }
1323  
1324          private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
1325          {
1326              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1327  
1328              Operand n = GetVec(op.Rn);
1329  
1330              // sizeF == ((OpCodeSimdShImm)op).Size - 2
1331              int sizeF = op.Size & 1;
1332  
1333              if (sizeF == 0)
1334              {
1335                  Operand mask = scalar // 65536.000f (1 << 16)
1336                      ? X86GetScalar(context, 0x47800000)
1337                      : X86GetAllElements(context, 0x47800000);
1338  
1339                  Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
1340                  res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
1341                  res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
1342  
1343                  Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
1344                  res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
1345                  res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
1346  
1347                  res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
1348  
1349                  if (op is OpCodeSimdShImm fixedOp)
1350                  {
1351                      int fBits = GetImmShr(fixedOp);
1352  
1353                      // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
1354                      int fpScaled = 0x3F800000 - fBits * 0x800000;
1355  
1356                      Operand fpScaledMask = scalar
1357                          ? X86GetScalar(context, fpScaled)
1358                          : X86GetAllElements(context, fpScaled);
1359  
1360                      res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
1361                  }
1362  
1363                  if (scalar)
1364                  {
1365                      res = context.VectorZeroUpper96(res);
1366                  }
1367                  else if (op.RegisterSize == RegisterSize.Simd64)
1368                  {
1369                      res = context.VectorZeroUpper64(res);
1370                  }
1371  
1372                  context.Copy(GetVec(op.Rd), res);
1373              }
1374              else /* if (sizeF == 1) */
1375              {
1376                  Operand mask = scalar // 4294967296.0000000d (1L << 32)
1377                      ? X86GetScalar(context, 0x41F0000000000000L)
1378                      : X86GetAllElements(context, 0x41F0000000000000L);
1379  
1380                  Operand res = context.AddIntrinsic(Intrinsic.X86Psrlq, n, Const(32));
1381                  res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
1382                  res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, mask);
1383  
1384                  Operand res2 = context.AddIntrinsic(Intrinsic.X86Psllq, n, Const(32));
1385                  res2 = context.AddIntrinsic(Intrinsic.X86Psrlq, res2, Const(32));
1386                  res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
1387  
1388                  res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
1389  
1390                  if (op is OpCodeSimdShImm fixedOp)
1391                  {
1392                      int fBits = GetImmShr(fixedOp);
1393  
1394                      // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
1395                      long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
1396  
1397                      Operand fpScaledMask = scalar
1398                          ? X86GetScalar(context, fpScaled)
1399                          : X86GetAllElements(context, fpScaled);
1400  
1401                      res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
1402                  }
1403  
1404                  if (scalar)
1405                  {
1406                      res = context.VectorZeroUpper64(res);
1407                  }
1408  
1409                  context.Copy(GetVec(op.Rd), res);
1410              }
1411          }
1412  
1413          private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
1414          {
1415              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1416  
1417              Operand n = GetVec(op.Rn);
1418  
1419              // sizeF == ((OpCodeSimdShImm)op).Size - 2
1420              int sizeF = op.Size & 1;
1421  
1422              if (sizeF == 0)
1423              {
1424                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
1425                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1426  
1427                  if (op is OpCodeSimdShImm fixedOp)
1428                  {
1429                      int fBits = GetImmShr(fixedOp);
1430  
1431                      // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
1432                      int fpScaled = 0x3F800000 + fBits * 0x800000;
1433  
1434                      Operand fpScaledMask = scalar
1435                          ? X86GetScalar(context, fpScaled)
1436                          : X86GetAllElements(context, fpScaled);
1437  
1438                      nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
1439                  }
1440  
1441                  if (roundMode != FPRoundingMode.ToNearestAway)
1442                  {
1443                      nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
1444                  }
1445                  else
1446                  {
1447                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
1448                  }
1449  
1450                  Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
1451  
1452                  Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
1453                      ? X86GetScalar(context, 0x4F000000)
1454                      : X86GetAllElements(context, 0x4F000000);
1455  
1456                  nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1457  
1458                  Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
1459  
1460                  if (scalar)
1461                  {
1462                      dRes = context.VectorZeroUpper96(dRes);
1463                  }
1464                  else if (op.RegisterSize == RegisterSize.Simd64)
1465                  {
1466                      dRes = context.VectorZeroUpper64(dRes);
1467                  }
1468  
1469                  context.Copy(GetVec(op.Rd), dRes);
1470              }
1471              else /* if (sizeF == 1) */
1472              {
1473                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
1474                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1475  
1476                  if (op is OpCodeSimdShImm fixedOp)
1477                  {
1478                      int fBits = GetImmShr(fixedOp);
1479  
1480                      // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
1481                      long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
1482  
1483                      Operand fpScaledMask = scalar
1484                          ? X86GetScalar(context, fpScaled)
1485                          : X86GetAllElements(context, fpScaled);
1486  
1487                      nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
1488                  }
1489  
1490                  if (roundMode != FPRoundingMode.ToNearestAway)
1491                  {
1492                      nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
1493                  }
1494                  else
1495                  {
1496                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
1497                  }
1498  
1499                  Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
1500  
1501                  Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
1502                      ? X86GetScalar(context, 0x43E0000000000000L)
1503                      : X86GetAllElements(context, 0x43E0000000000000L);
1504  
1505                  nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1506  
1507                  Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
1508  
1509                  if (scalar)
1510                  {
1511                      dRes = context.VectorZeroUpper64(dRes);
1512                  }
1513  
1514                  context.Copy(GetVec(op.Rd), dRes);
1515              }
1516          }
1517  
1518          private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
1519          {
1520              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
1521  
1522              Operand n = GetVec(op.Rn);
1523  
1524              // sizeF == ((OpCodeSimdShImm)op).Size - 2
1525              int sizeF = op.Size & 1;
1526  
1527              if (sizeF == 0)
1528              {
1529                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
1530                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1531  
1532                  if (op is OpCodeSimdShImm fixedOp)
1533                  {
1534                      int fBits = GetImmShr(fixedOp);
1535  
1536                      // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
1537                      int fpScaled = 0x3F800000 + fBits * 0x800000;
1538  
1539                      Operand fpScaledMask = scalar
1540                          ? X86GetScalar(context, fpScaled)
1541                          : X86GetAllElements(context, fpScaled);
1542  
1543                      nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
1544                  }
1545  
1546                  if (roundMode != FPRoundingMode.ToNearestAway)
1547                  {
1548                      nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
1549                  }
1550                  else
1551                  {
1552                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
1553                  }
1554  
1555                  Operand zero = context.VectorZero();
1556  
1557                  Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1558                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1559  
1560                  Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
1561                      ? X86GetScalar(context, 0x4F000000)
1562                      : X86GetAllElements(context, 0x4F000000);
1563  
1564                  Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
1565  
1566                  nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
1567  
1568                  nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1569                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1570  
1571                  Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
1572  
1573                  nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1574  
1575                  Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
1576                  dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
1577  
1578                  if (scalar)
1579                  {
1580                      dRes = context.VectorZeroUpper96(dRes);
1581                  }
1582                  else if (op.RegisterSize == RegisterSize.Simd64)
1583                  {
1584                      dRes = context.VectorZeroUpper64(dRes);
1585                  }
1586  
1587                  context.Copy(GetVec(op.Rd), dRes);
1588              }
1589              else /* if (sizeF == 1) */
1590              {
1591                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
1592                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1593  
1594                  if (op is OpCodeSimdShImm fixedOp)
1595                  {
1596                      int fBits = GetImmShr(fixedOp);
1597  
1598                      // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
1599                      long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
1600  
1601                      Operand fpScaledMask = scalar
1602                          ? X86GetScalar(context, fpScaled)
1603                          : X86GetAllElements(context, fpScaled);
1604  
1605                      nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
1606                  }
1607  
1608                  if (roundMode != FPRoundingMode.ToNearestAway)
1609                  {
1610                      nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
1611                  }
1612                  else
1613                  {
1614                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar);
1615                  }
1616  
1617                  Operand zero = context.VectorZero();
1618  
1619                  Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1620                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1621  
1622                  Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
1623                      ? X86GetScalar(context, 0x43E0000000000000L)
1624                      : X86GetAllElements(context, 0x43E0000000000000L);
1625  
1626                  Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
1627  
1628                  nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
1629  
1630                  nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1631                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1632  
1633                  Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
1634  
1635                  nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1636  
1637                  Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
1638                  dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
1639  
1640                  if (scalar)
1641                  {
1642                      dRes = context.VectorZeroUpper64(dRes);
1643                  }
1644  
1645                  context.Copy(GetVec(op.Rd), dRes);
1646              }
1647          }
1648  
1649          private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
1650          {
1651              OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
1652  
1653              Operand n = GetVec(op.Rn);
1654  
1655              if (op.Size == 0)
1656              {
1657                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
1658                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1659  
1660                  if (isFixed)
1661                  {
1662                      // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
1663                      int fpScaled = 0x3F800000 + op.FBits * 0x800000;
1664  
1665                      Operand fpScaledMask = X86GetScalar(context, fpScaled);
1666  
1667                      nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
1668                  }
1669  
1670                  if (roundMode != FPRoundingMode.ToNearestAway)
1671                  {
1672                      nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
1673                  }
1674                  else
1675                  {
1676                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
1677                  }
1678  
1679                  Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
1680                      ? context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes)
1681                      : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
1682  
1683                  int fpMaxVal = op.RegisterSize == RegisterSize.Int32
1684                      ? 0x4F000000  // 2.14748365E9f (2147483648)
1685                      : 0x5F000000; // 9.223372E18f  (9223372036854775808)
1686  
1687                  Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
1688  
1689                  nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1690  
1691                  Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
1692  
1693                  if (op.RegisterSize == RegisterSize.Int64)
1694                  {
1695                      nInt = context.SignExtend32(OperandType.I64, nInt);
1696                  }
1697  
1698                  Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
1699  
1700                  SetIntOrZR(context, op.Rd, dRes);
1701              }
1702              else /* if (op.Size == 1) */
1703              {
1704                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
1705                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1706  
1707                  if (isFixed)
1708                  {
1709                      // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
1710                      long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
1711  
1712                      Operand fpScaledMask = X86GetScalar(context, fpScaled);
1713  
1714                      nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
1715                  }
1716  
1717                  if (roundMode != FPRoundingMode.ToNearestAway)
1718                  {
1719                      nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
1720                  }
1721                  else
1722                  {
1723                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
1724                  }
1725  
1726                  Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
1727                      ? context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes)
1728                      : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
1729  
1730                  long fpMaxVal = op.RegisterSize == RegisterSize.Int32
1731                      ? 0x41E0000000000000L  // 2147483648.0000000d    (2147483648)
1732                      : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
1733  
1734                  Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
1735  
1736                  nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1737  
1738                  Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
1739  
1740                  if (op.RegisterSize == RegisterSize.Int32)
1741                  {
1742                      nLong = context.ConvertI64ToI32(nLong);
1743                  }
1744  
1745                  Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
1746  
1747                  SetIntOrZR(context, op.Rd, dRes);
1748              }
1749          }
1750  
1751          private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
1752          {
1753              OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
1754  
1755              Operand n = GetVec(op.Rn);
1756  
1757              if (op.Size == 0)
1758              {
1759                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
1760                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1761  
1762                  if (isFixed)
1763                  {
1764                      // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
1765                      int fpScaled = 0x3F800000 + op.FBits * 0x800000;
1766  
1767                      Operand fpScaledMask = X86GetScalar(context, fpScaled);
1768  
1769                      nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
1770                  }
1771  
1772                  if (roundMode != FPRoundingMode.ToNearestAway)
1773                  {
1774                      nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
1775                  }
1776                  else
1777                  {
1778                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
1779                  }
1780  
1781                  Operand zero = context.VectorZero();
1782  
1783                  Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1784                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1785  
1786                  int fpMaxVal = op.RegisterSize == RegisterSize.Int32
1787                      ? 0x4F000000  // 2.14748365E9f (2147483648)
1788                      : 0x5F000000; // 9.223372E18f  (9223372036854775808)
1789  
1790                  Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
1791  
1792                  Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
1793                      ? context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes)
1794                      : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
1795  
1796                  nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
1797  
1798                  nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1799                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1800  
1801                  Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
1802                      ? context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes)
1803                      : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
1804  
1805                  nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1806  
1807                  Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
1808  
1809                  if (op.RegisterSize == RegisterSize.Int64)
1810                  {
1811                      nInt = context.SignExtend32(OperandType.I64, nInt);
1812                  }
1813  
1814                  Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
1815                  dRes = context.Add(dRes, nIntOrLong);
1816  
1817                  SetIntOrZR(context, op.Rd, dRes);
1818              }
1819              else /* if (op.Size == 1) */
1820              {
1821                  Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
1822                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
1823  
1824                  if (isFixed)
1825                  {
1826                      // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
1827                      long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
1828  
1829                      Operand fpScaledMask = X86GetScalar(context, fpScaled);
1830  
1831                      nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
1832                  }
1833  
1834                  if (roundMode != FPRoundingMode.ToNearestAway)
1835                  {
1836                      nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
1837                  }
1838                  else
1839                  {
1840                      nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true);
1841                  }
1842  
1843                  Operand zero = context.VectorZero();
1844  
1845                  Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1846                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1847  
1848                  long fpMaxVal = op.RegisterSize == RegisterSize.Int32
1849                      ? 0x41E0000000000000L  // 2147483648.0000000d    (2147483648)
1850                      : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
1851  
1852                  Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
1853  
1854                  Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
1855                      ? context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes)
1856                      : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
1857  
1858                  nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
1859  
1860                  nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
1861                  nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
1862  
1863                  Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
1864                      ? context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes)
1865                      : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
1866  
1867                  nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
1868  
1869                  Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
1870  
1871                  if (op.RegisterSize == RegisterSize.Int32)
1872                  {
1873                      nLong = context.ConvertI64ToI32(nLong);
1874                  }
1875  
1876                  Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
1877                  dRes = context.Add(dRes, nIntOrLong);
1878  
1879                  SetIntOrZR(context, op.Rd, dRes);
1880              }
1881          }
1882  
1883          private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
1884          {
1885              OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
1886  
1887              return context.VectorExtract(type, GetVec(reg), index);
1888          }
1889      }
1890  }