InstEmitSimdCvt32.cs
1 using ARMeilleure.Decoders; 2 using ARMeilleure.IntermediateRepresentation; 3 using ARMeilleure.State; 4 using ARMeilleure.Translation; 5 using System; 6 using System.Diagnostics; 7 using System.Reflection; 8 using static ARMeilleure.Instructions.InstEmitHelper; 9 using static ARMeilleure.Instructions.InstEmitSimdHelper; 10 using static ARMeilleure.Instructions.InstEmitSimdHelper32; 11 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 12 13 namespace ARMeilleure.Instructions 14 { 15 static partial class InstEmit32 16 { 17 private static int FlipVdBits(int vd, bool lowBit) 18 { 19 if (lowBit) 20 { 21 // Move the low bit to the top. 22 return ((vd & 0x1) << 4) | (vd >> 1); 23 } 24 else 25 { 26 // Move the high bit to the bottom. 27 return ((vd & 0xf) << 1) | (vd >> 4); 28 } 29 } 30 31 private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned) 32 { 33 MethodInfo info; 34 35 if (op1.Type == OperandType.FP64) 36 { 37 info = unsigned 38 ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32)) 39 : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32)); 40 } 41 else 42 { 43 info = unsigned 44 ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)) 45 : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)); 46 } 47 48 return context.Call(info, op1); 49 } 50 51 public static void Vcvt_V(ArmEmitterContext context) 52 { 53 OpCode32Simd op = (OpCode32Simd)context.CurrOp; 54 55 bool unsigned = (op.Opc & 1) != 0; 56 bool toInteger = (op.Opc & 2) != 0; 57 OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; 58 59 if (toInteger) 60 { 61 if (Optimizations.UseAdvSimd) 62 { 63 InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV); 64 } 65 else if (Optimizations.UseSse41) 66 { 67 EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); 68 } 69 else 70 { 71 EmitVectorUnaryOpF32(context, (op1) => 72 { 73 return EmitSaturateFloatToInt(context, op1, unsigned); 74 }); 75 } 76 } 77 else 78 { 79 if (Optimizations.UseSse2) 80 { 81 EmitVectorUnaryOpSimd32(context, (n) => 82 { 83 if (unsigned) 84 { 85 Operand mask = X86GetAllElements(context, 0x47800000); 86 87 Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); 88 res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); 89 res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); 90 91 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); 92 res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); 93 res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); 94 95 return context.AddIntrinsic(Intrinsic.X86Addps, res, res2); 96 } 97 else 98 { 99 return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n); 100 } 101 }); 102 } 103 else 104 { 105 if (unsigned) 106 { 107 EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); 108 } 109 else 110 { 111 EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); 112 } 113 } 114 } 115 } 116 117 public static void Vcvt_V_Fixed(ArmEmitterContext context) 118 { 119 OpCode32SimdCvtFFixed op = (OpCode32SimdCvtFFixed)context.CurrOp; 120 121 var toFixed = op.Opc == 1; 122 int fracBits = op.Fbits; 123 var unsigned = op.U; 124 125 if (toFixed) // F32 to S32 or U32 (fixed) 126 { 127 EmitVectorUnaryOpF32(context, (op1) => 128 { 129 var scaledValue = context.Multiply(op1, ConstF(MathF.Pow(2f, fracBits))); 130 MethodInfo info = unsigned ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)) : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)); 131 132 return context.Call(info, scaledValue); 133 }); 134 } 135 else // S32 or U32 (fixed) to F32 136 { 137 EmitVectorUnaryOpI32(context, (op1) => 138 { 139 var floatValue = unsigned ? context.ConvertToFPUI(OperandType.FP32, op1) : context.ConvertToFP(OperandType.FP32, op1); 140 141 return context.Multiply(floatValue, ConstF(1f / MathF.Pow(2f, fracBits))); 142 }, !unsigned); 143 } 144 } 145 146 public static void Vcvt_FD(ArmEmitterContext context) 147 { 148 OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; 149 150 int vm = op.Vm; 151 int vd; 152 if (op.Size == 3) 153 { 154 vd = FlipVdBits(op.Vd, false); 155 // Double to single. 156 Operand fp = ExtractScalar(context, OperandType.FP64, vm); 157 158 Operand res = context.ConvertToFP(OperandType.FP32, fp); 159 160 InsertScalar(context, vd, res); 161 } 162 else 163 { 164 vd = FlipVdBits(op.Vd, true); 165 // Single to double. 166 Operand fp = ExtractScalar(context, OperandType.FP32, vm); 167 168 Operand res = context.ConvertToFP(OperandType.FP64, fp); 169 170 InsertScalar(context, vd, res); 171 } 172 } 173 174 // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point). 175 public static void Vcvt_FI(ArmEmitterContext context) 176 { 177 OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; 178 179 bool toInteger = (op.Opc2 & 0b100) != 0; 180 181 OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; 182 183 if (toInteger) 184 { 185 bool unsigned = (op.Opc2 & 1) == 0; 186 bool roundWithFpscr = op.Opc != 1; 187 188 if (!roundWithFpscr && Optimizations.UseAdvSimd) 189 { 190 bool doubleSize = floatSize == OperandType.FP64; 191 192 if (doubleSize) 193 { 194 Operand m = GetVecA32(op.Vm >> 1); 195 196 Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, true); 197 198 Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble; 199 200 Operand asInteger = context.AddIntrinsicInt(inst, toConvert); 201 202 InsertScalar(context, op.Vd, asInteger); 203 } 204 else 205 { 206 InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS, false); 207 } 208 } 209 else if (!roundWithFpscr && Optimizations.UseSse41) 210 { 211 EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned); 212 } 213 else 214 { 215 Operand toConvert = ExtractScalar(context, floatSize, op.Vm); 216 217 // TODO: Fast Path. 218 if (roundWithFpscr) 219 { 220 toConvert = EmitRoundByRMode(context, toConvert); 221 } 222 223 // Round towards zero. 224 Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); 225 226 InsertScalar(context, op.Vd, asInteger); 227 } 228 } 229 else 230 { 231 bool unsigned = op.Opc == 0; 232 233 Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm); 234 235 Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned); 236 237 InsertScalar(context, op.Vd, asFloat); 238 } 239 } 240 241 private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) 242 { 243 IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; 244 245 string name = nameof(Math.Round); 246 247 MethodInfo info = (op.Size & 1) == 0 248 ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) }) 249 : typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) }); 250 251 return context.Call(info, n, Const((int)roundMode)); 252 } 253 254 private static FPRoundingMode RMToRoundMode(int rm) 255 { 256 return rm switch 257 { 258 0b00 => FPRoundingMode.ToNearestAway, 259 0b01 => FPRoundingMode.ToNearest, 260 0b10 => FPRoundingMode.TowardsPlusInfinity, 261 0b11 => FPRoundingMode.TowardsMinusInfinity, 262 _ => throw new ArgumentOutOfRangeException(nameof(rm)), 263 }; 264 } 265 266 // VCVTA/M/N/P (floating-point). 267 public static void Vcvt_RM(ArmEmitterContext context) 268 { 269 OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1). 270 271 OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; 272 273 bool unsigned = op.Opc == 0; 274 int rm = op.Opc2 & 3; 275 276 Intrinsic inst; 277 278 if (Optimizations.UseAdvSimd) 279 { 280 bool doubleSize = floatSize == OperandType.FP64; 281 282 if (doubleSize) 283 { 284 Operand m = GetVecA32(op.Vm >> 1); 285 286 Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, true); 287 288 if (unsigned) 289 { 290 inst = rm switch 291 { 292 0b00 => Intrinsic.Arm64FcvtauGp, 293 0b01 => Intrinsic.Arm64FcvtnuGp, 294 0b10 => Intrinsic.Arm64FcvtpuGp, 295 0b11 => Intrinsic.Arm64FcvtmuGp, 296 _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"), 297 }; 298 } 299 else 300 { 301 inst = rm switch 302 { 303 0b00 => Intrinsic.Arm64FcvtasGp, 304 0b01 => Intrinsic.Arm64FcvtnsGp, 305 0b10 => Intrinsic.Arm64FcvtpsGp, 306 0b11 => Intrinsic.Arm64FcvtmsGp, 307 _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"), 308 }; 309 } 310 311 Operand asInteger = context.AddIntrinsicInt(inst | Intrinsic.Arm64VDouble, toConvert); 312 313 InsertScalar(context, op.Vd, asInteger); 314 } 315 else 316 { 317 if (unsigned) 318 { 319 inst = rm switch 320 { 321 0b00 => Intrinsic.Arm64FcvtauS, 322 0b01 => Intrinsic.Arm64FcvtnuS, 323 0b10 => Intrinsic.Arm64FcvtpuS, 324 0b11 => Intrinsic.Arm64FcvtmuS, 325 _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"), 326 }; 327 } 328 else 329 { 330 inst = rm switch 331 { 332 0b00 => Intrinsic.Arm64FcvtasS, 333 0b01 => Intrinsic.Arm64FcvtnsS, 334 0b10 => Intrinsic.Arm64FcvtpsS, 335 0b11 => Intrinsic.Arm64FcvtmsS, 336 _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"), 337 }; 338 } 339 340 InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); 341 } 342 } 343 else if (Optimizations.UseSse41) 344 { 345 EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned); 346 } 347 else 348 { 349 Operand toConvert = ExtractScalar(context, floatSize, op.Vm); 350 351 switch (rm) 352 { 353 case 0b00: // Away 354 toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); 355 break; 356 case 0b01: // Nearest 357 toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); 358 break; 359 case 0b10: // Towards positive infinity 360 toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert); 361 break; 362 case 0b11: // Towards negative infinity 363 toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert); 364 break; 365 } 366 367 Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); 368 369 InsertScalar(context, op.Vd, asInteger); 370 } 371 } 372 373 public static void Vcvt_TB(ArmEmitterContext context) 374 { 375 OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp; 376 377 if (Optimizations.UseF16c) 378 { 379 Debug.Assert(!Optimizations.ForceLegacySse); 380 381 if (op.Op) 382 { 383 Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm); 384 if (op.Size == 1) 385 { 386 res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res); 387 } 388 res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); 389 res = context.VectorExtract16(res, 0); 390 InsertScalar16(context, op.Vd, op.T, res); 391 } 392 else 393 { 394 Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T)); 395 res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res); 396 if (op.Size == 1) 397 { 398 res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res); 399 } 400 res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0); 401 InsertScalar(context, op.Vd, res); 402 } 403 } 404 else 405 { 406 if (op.Op) 407 { 408 // Convert to half. 409 410 Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm); 411 412 MethodInfo method = op.Size == 1 413 ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)) 414 : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)); 415 416 context.ExitArmFpMode(); 417 context.StoreToContext(); 418 Operand res = context.Call(method, src); 419 context.LoadFromContext(); 420 context.EnterArmFpMode(); 421 422 InsertScalar16(context, op.Vd, op.T, res); 423 } 424 else 425 { 426 // Convert from half. 427 428 Operand src = ExtractScalar16(context, op.Vm, op.T); 429 430 MethodInfo method = op.Size == 1 431 ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)) 432 : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)); 433 434 context.ExitArmFpMode(); 435 context.StoreToContext(); 436 Operand res = context.Call(method, src); 437 context.LoadFromContext(); 438 context.EnterArmFpMode(); 439 440 InsertScalar(context, op.Vd, res); 441 } 442 } 443 } 444 445 // VRINTA/M/N/P (floating-point). 446 public static void Vrint_RM(ArmEmitterContext context) 447 { 448 OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; 449 450 OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; 451 452 int rm = op.Opc2 & 3; 453 454 if (Optimizations.UseAdvSimd) 455 { 456 Intrinsic inst = rm switch 457 { 458 0b00 => Intrinsic.Arm64FrintaS, 459 0b01 => Intrinsic.Arm64FrintnS, 460 0b10 => Intrinsic.Arm64FrintpS, 461 0b11 => Intrinsic.Arm64FrintmS, 462 _ => throw new InvalidOperationException($"{nameof(rm)} contains an invalid value: {rm}"), 463 }; 464 465 InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); 466 } 467 else if (Optimizations.UseSse41) 468 { 469 EmitScalarUnaryOpSimd32(context, (m) => 470 { 471 FPRoundingMode roundMode = RMToRoundMode(rm); 472 473 if (roundMode != FPRoundingMode.ToNearestAway) 474 { 475 Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd; 476 return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode))); 477 } 478 else 479 { 480 return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true); 481 } 482 }); 483 } 484 else 485 { 486 Operand toConvert = ExtractScalar(context, floatSize, op.Vm); 487 488 switch (rm) 489 { 490 case 0b00: // Away 491 toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); 492 break; 493 case 0b01: // Nearest 494 toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); 495 break; 496 case 0b10: // Towards positive infinity 497 toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert); 498 break; 499 case 0b11: // Towards negative infinity 500 toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert); 501 break; 502 } 503 504 InsertScalar(context, op.Vd, toConvert); 505 } 506 } 507 508 // VRINTA (vector). 509 public static void Vrinta_V(ArmEmitterContext context) 510 { 511 if (Optimizations.UseAdvSimd) 512 { 513 InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS); 514 } 515 else 516 { 517 EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m)); 518 } 519 } 520 521 // VRINTM (vector). 522 public static void Vrintm_V(ArmEmitterContext context) 523 { 524 if (Optimizations.UseAdvSimd) 525 { 526 InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS); 527 } 528 else if (Optimizations.UseSse2) 529 { 530 EmitVectorUnaryOpSimd32(context, (m) => 531 { 532 return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity))); 533 }); 534 } 535 else 536 { 537 EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m)); 538 } 539 } 540 541 // VRINTN (vector). 542 public static void Vrintn_V(ArmEmitterContext context) 543 { 544 if (Optimizations.UseAdvSimd) 545 { 546 InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS); 547 } 548 else if (Optimizations.UseSse2) 549 { 550 EmitVectorUnaryOpSimd32(context, (m) => 551 { 552 return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); 553 }); 554 } 555 else 556 { 557 EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m)); 558 } 559 } 560 561 // VRINTP (vector). 562 public static void Vrintp_V(ArmEmitterContext context) 563 { 564 if (Optimizations.UseAdvSimd) 565 { 566 InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS); 567 } 568 else if (Optimizations.UseSse2) 569 { 570 EmitVectorUnaryOpSimd32(context, (m) => 571 { 572 return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity))); 573 }); 574 } 575 else 576 { 577 EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m)); 578 } 579 } 580 581 // VRINTR (floating-point). 582 public static void Vrintr_S(ArmEmitterContext context) 583 { 584 if (Optimizations.UseAdvSimd) 585 { 586 InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintiS); 587 } 588 else 589 { 590 EmitScalarUnaryOpF32(context, (op1) => 591 { 592 return EmitRoundByRMode(context, op1); 593 }); 594 } 595 } 596 597 // VRINTZ (floating-point). 598 public static void Vrint_Z(ArmEmitterContext context) 599 { 600 OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; 601 602 if (Optimizations.UseAdvSimd) 603 { 604 InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS); 605 } 606 else if (Optimizations.UseSse2) 607 { 608 EmitScalarUnaryOpSimd32(context, (m) => 609 { 610 Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd; 611 return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero))); 612 }); 613 } 614 else 615 { 616 EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1)); 617 } 618 } 619 620 // VRINTX (floating-point). 621 public static void Vrintx_S(ArmEmitterContext context) 622 { 623 if (Optimizations.UseAdvSimd) 624 { 625 InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS); 626 } 627 else 628 { 629 EmitScalarUnaryOpF32(context, (op1) => 630 { 631 return EmitRoundByRMode(context, op1); 632 }); 633 } 634 } 635 636 private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed) 637 { 638 Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); 639 640 if (signed) 641 { 642 return context.ConvertToFP(type, value); 643 } 644 else 645 { 646 return context.ConvertToFPUI(type, value); 647 } 648 } 649 650 private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) 651 { 652 // A port of the similar round function in InstEmitSimdCvt. 653 OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; 654 655 bool doubleSize = (op.Size & 1) != 0; 656 int shift = doubleSize ? 1 : 2; 657 Operand n = GetVecA32(op.Vm >> shift); 658 n = EmitSwapScalar(context, n, op.Vm, doubleSize); 659 660 if (!doubleSize) 661 { 662 Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ)); 663 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); 664 665 if (roundMode != FPRoundingMode.ToNearestAway) 666 { 667 nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode))); 668 } 669 else 670 { 671 nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); 672 } 673 674 Operand zero = context.VectorZero(); 675 676 Operand nCmp; 677 Operand nIntOrLong2 = default; 678 679 if (!signed) 680 { 681 nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 682 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 683 } 684 685 int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648) 686 687 Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); 688 689 Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); 690 691 if (!signed) 692 { 693 nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask); 694 695 nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 696 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 697 698 nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); 699 } 700 701 nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); 702 703 Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes); 704 705 Operand dRes; 706 if (signed) 707 { 708 dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt); 709 } 710 else 711 { 712 dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt); 713 dRes = context.Add(dRes, nIntOrLong); 714 } 715 716 InsertScalar(context, op.Vd, dRes); 717 } 718 else 719 { 720 Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ)); 721 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); 722 723 if (roundMode != FPRoundingMode.ToNearestAway) 724 { 725 nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode))); 726 } 727 else 728 { 729 nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); 730 } 731 732 Operand zero = context.VectorZero(); 733 734 Operand nCmp; 735 Operand nIntOrLong2 = default; 736 737 if (!signed) 738 { 739 nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 740 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 741 } 742 743 long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648) 744 745 Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); 746 747 Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); 748 749 if (!signed) 750 { 751 nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask); 752 753 nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 754 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 755 756 nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); 757 } 758 759 nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); 760 761 Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes); 762 nLong = context.ConvertI64ToI32(nLong); 763 764 Operand dRes; 765 if (signed) 766 { 767 dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong); 768 } 769 else 770 { 771 dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong); 772 dRes = context.Add(dRes, nIntOrLong); 773 } 774 775 InsertScalar(context, op.Vd, dRes); 776 } 777 } 778 779 private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) 780 { 781 OpCode32Simd op = (OpCode32Simd)context.CurrOp; 782 783 EmitVectorUnaryOpSimd32(context, (n) => 784 { 785 int sizeF = op.Size & 1; 786 787 if (sizeF == 0) 788 { 789 Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); 790 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); 791 792 nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode))); 793 794 Operand zero = context.VectorZero(); 795 Operand nCmp; 796 if (!signed) 797 { 798 nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 799 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 800 } 801 802 Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) 803 804 Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); 805 Operand nInt2 = default; 806 807 if (!signed) 808 { 809 nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask); 810 811 nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 812 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 813 814 nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); 815 } 816 817 nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); 818 819 if (signed) 820 { 821 return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes); 822 } 823 else 824 { 825 Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes); 826 return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt); 827 } 828 } 829 else /* if (sizeF == 1) */ 830 { 831 Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); 832 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); 833 834 nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode))); 835 836 Operand zero = context.VectorZero(); 837 Operand nCmp; 838 if (!signed) 839 { 840 nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 841 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 842 } 843 844 Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) 845 846 Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); 847 Operand nLong2 = default; 848 849 if (!signed) 850 { 851 nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask); 852 853 nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); 854 nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); 855 856 nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); 857 } 858 859 nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); 860 861 if (signed) 862 { 863 return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes); 864 } 865 else 866 { 867 Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes); 868 return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong); 869 } 870 } 871 }); 872 } 873 } 874 }