InstEmitSimdArithmetic.cs
1 // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h 2 // https://www.agner.org/optimize/#vectorclass @ vectori128.h 3 4 using ARMeilleure.Decoders; 5 using ARMeilleure.IntermediateRepresentation; 6 using ARMeilleure.State; 7 using ARMeilleure.Translation; 8 using System; 9 using System.Diagnostics; 10 using static ARMeilleure.Instructions.InstEmitHelper; 11 using static ARMeilleure.Instructions.InstEmitSimdHelper; 12 using static ARMeilleure.Instructions.InstEmitSimdHelper32; 13 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 14 15 namespace ARMeilleure.Instructions 16 { 17 using Func2I = Func<Operand, Operand, Operand>; 18 19 static partial class InstEmit 20 { 21 public static void Abs_S(ArmEmitterContext context) 22 { 23 if (Optimizations.UseAdvSimd) 24 { 25 InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS); 26 } 27 else 28 { 29 EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); 30 } 31 } 32 33 public static void Abs_V(ArmEmitterContext context) 34 { 35 if (Optimizations.UseAdvSimd) 36 { 37 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV); 38 } 39 else 40 { 41 EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); 42 } 43 } 44 45 public static void Add_S(ArmEmitterContext context) 46 { 47 if (Optimizations.UseAdvSimd) 48 { 49 InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS); 50 } 51 else 52 { 53 EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); 54 } 55 } 56 57 public static void Add_V(ArmEmitterContext context) 58 { 59 if (Optimizations.UseAdvSimd) 60 { 61 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV); 62 } 63 else if (Optimizations.UseSse2) 64 { 65 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 66 67 Operand n = GetVec(op.Rn); 68 Operand m = GetVec(op.Rm); 69 70 Intrinsic addInst = X86PaddInstruction[op.Size]; 71 72 Operand res = context.AddIntrinsic(addInst, n, m); 73 74 if (op.RegisterSize == RegisterSize.Simd64) 75 { 76 res = context.VectorZeroUpper64(res); 77 } 78 79 context.Copy(GetVec(op.Rd), res); 80 } 81 else 82 { 83 EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); 84 } 85 } 86 87 public static void Addhn_V(ArmEmitterContext context) 88 { 89 if (Optimizations.UseAdvSimd) 90 { 91 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV); 92 } 93 else 94 { 95 EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); 96 } 97 } 98 99 public static void Addp_S(ArmEmitterContext context) 100 { 101 if (Optimizations.UseAdvSimd) 102 { 103 InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS); 104 } 105 else 106 { 107 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 108 109 Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); 110 Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); 111 112 Operand res = context.Add(ne0, ne1); 113 114 context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); 115 } 116 } 117 118 public static void Addp_V(ArmEmitterContext context) 119 { 120 if (Optimizations.UseAdvSimd) 121 { 122 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV); 123 } 124 else if (Optimizations.UseSsse3) 125 { 126 EmitSsse3VectorPairwiseOp(context, X86PaddInstruction); 127 } 128 else 129 { 130 EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2)); 131 } 132 } 133 134 public static void Addv_V(ArmEmitterContext context) 135 { 136 if (Optimizations.UseAdvSimd) 137 { 138 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV); 139 } 140 else 141 { 142 EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); 143 } 144 } 145 146 public static void Cls_V(ArmEmitterContext context) 147 { 148 if (Optimizations.UseAdvSimd) 149 { 150 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV); 151 } 152 else 153 { 154 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 155 156 Operand res = context.VectorZero(); 157 158 int elems = op.GetBytesCount() >> op.Size; 159 160 int eSize = 8 << op.Size; 161 162 for (int index = 0; index < elems; index++) 163 { 164 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 165 166 Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize)); 167 168 res = EmitVectorInsert(context, res, de, index, op.Size); 169 } 170 171 context.Copy(GetVec(op.Rd), res); 172 } 173 } 174 175 public static void Clz_V(ArmEmitterContext context) 176 { 177 if (Optimizations.UseAdvSimd) 178 { 179 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV); 180 } 181 else 182 { 183 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 184 185 int eSize = 8 << op.Size; 186 187 Operand res = eSize switch 188 { 189 8 => Clz_V_I8(context, GetVec(op.Rn)), 190 16 => Clz_V_I16(context, GetVec(op.Rn)), 191 32 => Clz_V_I32(context, GetVec(op.Rn)), 192 _ => default, 193 }; 194 195 if (res != default) 196 { 197 if (op.RegisterSize == RegisterSize.Simd64) 198 { 199 res = context.VectorZeroUpper64(res); 200 } 201 } 202 else 203 { 204 int elems = op.GetBytesCount() >> op.Size; 205 206 res = context.VectorZero(); 207 208 for (int index = 0; index < elems; index++) 209 { 210 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 211 212 Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); 213 214 res = EmitVectorInsert(context, res, de, index, op.Size); 215 } 216 } 217 218 context.Copy(GetVec(op.Rd), res); 219 } 220 } 221 222 private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg) 223 { 224 if (!Optimizations.UseSsse3) 225 { 226 return default; 227 } 228 229 // CLZ nibble table. 230 Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04); 231 232 Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f); 233 Operand c04 = X86GetAllElements(context, 0x04_04_04_04); 234 235 // CLZ of low 4 bits of elements in arg. 236 Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg); 237 238 // Get the high 4 bits of elements in arg. 239 Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4)); 240 hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow); 241 242 // CLZ of high 4 bits of elements in arg. 243 Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg); 244 245 // If high 4 bits are not all zero, we discard the CLZ of the low 4 bits. 246 Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqb, hiClz, c04); 247 loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask); 248 249 return context.AddIntrinsic(Intrinsic.X86Paddb, loClz, hiClz); 250 } 251 252 private static Operand Clz_V_I16(ArmEmitterContext context, Operand arg) 253 { 254 if (!Optimizations.UseSsse3) 255 { 256 return default; 257 } 258 259 Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01); 260 Operand maskLow = X86GetAllElements(context, 0x00ff_00ff); 261 Operand c0008 = X86GetAllElements(context, 0x0008_0008); 262 263 // CLZ pair of high 8 and low 8 bits of elements in arg. 264 Operand hiloClz = Clz_V_I8(context, arg); 265 // Get CLZ of low 8 bits in each pair. 266 Operand loClz = context.AddIntrinsic(Intrinsic.X86Pand, hiloClz, maskLow); 267 // Get CLZ of high 8 bits in each pair. 268 Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, hiloClz, maskSwap); 269 270 // If high 8 bits are not all zero, we discard the CLZ of the low 8 bits. 271 Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, hiClz, c0008); 272 loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask); 273 274 return context.AddIntrinsic(Intrinsic.X86Paddw, loClz, hiClz); 275 } 276 277 private static Operand Clz_V_I32(ArmEmitterContext context, Operand arg) 278 { 279 // TODO: Use vplzcntd when AVX-512 is supported. 280 if (!Optimizations.UseSse2) 281 { 282 return default; 283 } 284 285 #pragma warning disable IDE0055 // Disable formatting 286 Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1); 287 Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1); 288 Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8)); 289 Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1); 290 Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1); 291 Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne()); 292 #pragma warning restore IDE0055 293 294 Operand c55555555 = X86GetAllElements(context, 0x55555555); 295 Operand c33333333 = X86GetAllElements(context, 0x33333333); 296 Operand c0f0f0f0f = X86GetAllElements(context, 0x0f0f0f0f); 297 Operand c0000003f = X86GetAllElements(context, 0x0000003f); 298 299 Operand tmp0; 300 Operand tmp1; 301 Operand res; 302 303 // Set all bits after highest set bit to 1. 304 res = OrVector(ShiftRightVectorUI32(arg, 1), arg); 305 res = OrVector(ShiftRightVectorUI32(res, 2), res); 306 res = OrVector(ShiftRightVectorUI32(res, 4), res); 307 res = OrVector(ShiftRightVectorUI32(res, 8), res); 308 res = OrVector(ShiftRightVectorUI32(res, 16), res); 309 310 // Make leading 0s into leading 1s. 311 res = NotVector(res); 312 313 // Count leading 1s, which is the population count. 314 tmp0 = ShiftRightVectorUI32(res, 1); 315 tmp0 = AndVector(tmp0, c55555555); 316 res = SubVectorI32(res, tmp0); 317 318 tmp0 = ShiftRightVectorUI32(res, 2); 319 tmp0 = AndVector(tmp0, c33333333); 320 tmp1 = AndVector(res, c33333333); 321 res = AddVectorI32(tmp0, tmp1); 322 323 tmp0 = ShiftRightVectorUI32(res, 4); 324 tmp0 = AddVectorI32(tmp0, res); 325 res = AndVector(tmp0, c0f0f0f0f); 326 327 tmp0 = ShiftRightVectorUI32(res, 8); 328 res = AddVectorI32(tmp0, res); 329 330 tmp0 = ShiftRightVectorUI32(res, 16); 331 res = AddVectorI32(tmp0, res); 332 333 res = AndVector(res, c0000003f); 334 335 return res; 336 } 337 338 public static void Cnt_V(ArmEmitterContext context) 339 { 340 if (Optimizations.UseAdvSimd) 341 { 342 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV); 343 } 344 else 345 { 346 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 347 348 Operand res = context.VectorZero(); 349 350 int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; 351 352 for (int index = 0; index < elems; index++) 353 { 354 Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); 355 356 Operand de; 357 358 if (Optimizations.UsePopCnt) 359 { 360 de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); 361 } 362 else 363 { 364 de = EmitCountSetBits8(context, ne); 365 } 366 367 res = EmitVectorInsert(context, res, de, index, 0); 368 } 369 370 context.Copy(GetVec(op.Rd), res); 371 } 372 } 373 374 public static void Fabd_S(ArmEmitterContext context) 375 { 376 if (Optimizations.UseAdvSimd) 377 { 378 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS); 379 } 380 else if (Optimizations.FastFP && Optimizations.UseSse2) 381 { 382 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 383 384 int sizeF = op.Size & 1; 385 386 if (sizeF == 0) 387 { 388 Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm)); 389 390 res = EmitFloatAbs(context, res, true, false); 391 392 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 393 } 394 else /* if (sizeF == 1) */ 395 { 396 Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm)); 397 398 res = EmitFloatAbs(context, res, false, false); 399 400 context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); 401 } 402 } 403 else 404 { 405 EmitScalarBinaryOpF(context, (op1, op2) => 406 { 407 Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); 408 409 return EmitUnaryMathCall(context, nameof(Math.Abs), res); 410 }); 411 } 412 } 413 414 public static void Fabd_V(ArmEmitterContext context) 415 { 416 if (Optimizations.UseAdvSimd) 417 { 418 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV); 419 } 420 else if (Optimizations.FastFP && Optimizations.UseSse2) 421 { 422 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 423 424 int sizeF = op.Size & 1; 425 426 if (sizeF == 0) 427 { 428 Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm)); 429 430 res = EmitFloatAbs(context, res, true, true); 431 432 if (op.RegisterSize == RegisterSize.Simd64) 433 { 434 res = context.VectorZeroUpper64(res); 435 } 436 437 context.Copy(GetVec(op.Rd), res); 438 } 439 else /* if (sizeF == 1) */ 440 { 441 Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm)); 442 443 res = EmitFloatAbs(context, res, false, true); 444 445 context.Copy(GetVec(op.Rd), res); 446 } 447 } 448 else 449 { 450 EmitVectorBinaryOpF(context, (op1, op2) => 451 { 452 Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); 453 454 return EmitUnaryMathCall(context, nameof(Math.Abs), res); 455 }); 456 } 457 } 458 459 public static void Fabs_S(ArmEmitterContext context) 460 { 461 if (Optimizations.UseAdvSimd) 462 { 463 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS); 464 } 465 else if (Optimizations.UseSse2) 466 { 467 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 468 469 if (op.Size == 0) 470 { 471 Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false); 472 473 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 474 } 475 else /* if (op.Size == 1) */ 476 { 477 Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false); 478 479 context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); 480 } 481 } 482 else 483 { 484 EmitScalarUnaryOpF(context, (op1) => 485 { 486 return EmitUnaryMathCall(context, nameof(Math.Abs), op1); 487 }); 488 } 489 } 490 491 public static void Fabs_V(ArmEmitterContext context) 492 { 493 if (Optimizations.UseAdvSimd) 494 { 495 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV); 496 } 497 else if (Optimizations.UseSse2) 498 { 499 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 500 501 int sizeF = op.Size & 1; 502 503 if (sizeF == 0) 504 { 505 Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true); 506 507 if (op.RegisterSize == RegisterSize.Simd64) 508 { 509 res = context.VectorZeroUpper64(res); 510 } 511 512 context.Copy(GetVec(op.Rd), res); 513 } 514 else /* if (sizeF == 1) */ 515 { 516 Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true); 517 518 context.Copy(GetVec(op.Rd), res); 519 } 520 } 521 else 522 { 523 EmitVectorUnaryOpF(context, (op1) => 524 { 525 return EmitUnaryMathCall(context, nameof(Math.Abs), op1); 526 }); 527 } 528 } 529 530 public static void Fadd_S(ArmEmitterContext context) 531 { 532 if (Optimizations.UseAdvSimd) 533 { 534 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS); 535 } 536 else if (Optimizations.FastFP && Optimizations.UseSse2) 537 { 538 EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); 539 } 540 else if (Optimizations.FastFP) 541 { 542 EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); 543 } 544 else 545 { 546 EmitScalarBinaryOpF(context, (op1, op2) => 547 { 548 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); 549 }); 550 } 551 } 552 553 public static void Fadd_V(ArmEmitterContext context) 554 { 555 if (Optimizations.UseAdvSimd) 556 { 557 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV); 558 } 559 else if (Optimizations.FastFP && Optimizations.UseSse2) 560 { 561 EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); 562 } 563 else if (Optimizations.FastFP) 564 { 565 EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); 566 } 567 else 568 { 569 EmitVectorBinaryOpF(context, (op1, op2) => 570 { 571 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); 572 }); 573 } 574 } 575 576 public static void Faddp_S(ArmEmitterContext context) 577 { 578 if (Optimizations.UseAdvSimd) 579 { 580 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS); 581 } 582 else if (Optimizations.FastFP && Optimizations.UseSse3) 583 { 584 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 585 586 if ((op.Size & 1) == 0) 587 { 588 Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn)); 589 590 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 591 } 592 else /* if ((op.Size & 1) == 1) */ 593 { 594 Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn)); 595 596 context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); 597 } 598 } 599 else 600 { 601 EmitScalarPairwiseOpF(context, (op1, op2) => 602 { 603 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); 604 }); 605 } 606 } 607 608 public static void Faddp_V(ArmEmitterContext context) 609 { 610 if (Optimizations.UseAdvSimd) 611 { 612 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV); 613 } 614 else if (Optimizations.FastFP && Optimizations.UseSse41) 615 { 616 EmitSse2VectorPairwiseOpF(context, (op1, op2) => 617 { 618 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 619 { 620 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 621 622 Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd; 623 624 return context.AddIntrinsic(addInst, op1, op2); 625 }, scalar: false, op1, op2); 626 }); 627 } 628 else 629 { 630 EmitVectorPairwiseOpF(context, (op1, op2) => 631 { 632 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); 633 }); 634 } 635 } 636 637 public static void Fdiv_S(ArmEmitterContext context) 638 { 639 if (Optimizations.UseAdvSimd) 640 { 641 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS); 642 } 643 else if (Optimizations.FastFP && Optimizations.UseSse2) 644 { 645 EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); 646 } 647 else if (Optimizations.FastFP) 648 { 649 EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); 650 } 651 else 652 { 653 EmitScalarBinaryOpF(context, (op1, op2) => 654 { 655 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); 656 }); 657 } 658 } 659 660 public static void Fdiv_V(ArmEmitterContext context) 661 { 662 if (Optimizations.UseAdvSimd) 663 { 664 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV); 665 } 666 else if (Optimizations.FastFP && Optimizations.UseSse2) 667 { 668 EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); 669 } 670 else if (Optimizations.FastFP) 671 { 672 EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); 673 } 674 else 675 { 676 EmitVectorBinaryOpF(context, (op1, op2) => 677 { 678 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); 679 }); 680 } 681 } 682 683 public static void Fmadd_S(ArmEmitterContext context) // Fused. 684 { 685 if (Optimizations.UseAdvSimd) 686 { 687 InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS); 688 } 689 else if (Optimizations.FastFP && Optimizations.UseSse2) 690 { 691 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 692 693 Operand d = GetVec(op.Rd); 694 Operand a = GetVec(op.Ra); 695 Operand n = GetVec(op.Rn); 696 Operand m = GetVec(op.Rm); 697 698 Operand res; 699 700 if (op.Size == 0) 701 { 702 if (Optimizations.UseFma) 703 { 704 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m); 705 } 706 else 707 { 708 res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); 709 res = context.AddIntrinsic(Intrinsic.X86Addss, a, res); 710 } 711 712 context.Copy(d, context.VectorZeroUpper96(res)); 713 } 714 else /* if (op.Size == 1) */ 715 { 716 if (Optimizations.UseFma) 717 { 718 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m); 719 } 720 else 721 { 722 res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); 723 res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res); 724 } 725 726 context.Copy(d, context.VectorZeroUpper64(res)); 727 } 728 } 729 else 730 { 731 EmitScalarTernaryRaOpF(context, (op1, op2, op3) => 732 { 733 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); 734 }); 735 } 736 } 737 738 public static void Fmax_S(ArmEmitterContext context) 739 { 740 if (Optimizations.UseAdvSimd) 741 { 742 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS); 743 } 744 else if (Optimizations.FastFP && Optimizations.UseSse41) 745 { 746 EmitSse41ProcessNaNsOpF(context, (op1, op2) => 747 { 748 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); 749 }, scalar: true); 750 } 751 else 752 { 753 EmitScalarBinaryOpF(context, (op1, op2) => 754 { 755 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); 756 }); 757 } 758 } 759 760 public static void Fmax_V(ArmEmitterContext context) 761 { 762 if (Optimizations.UseAdvSimd) 763 { 764 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV); 765 } 766 else if (Optimizations.FastFP && Optimizations.UseSse41) 767 { 768 EmitSse41ProcessNaNsOpF(context, (op1, op2) => 769 { 770 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); 771 }, scalar: false); 772 } 773 else 774 { 775 EmitVectorBinaryOpF(context, (op1, op2) => 776 { 777 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); 778 }); 779 } 780 } 781 782 public static void Fmaxnm_S(ArmEmitterContext context) 783 { 784 if (Optimizations.UseAdvSimd) 785 { 786 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS); 787 } 788 else if (Optimizations.FastFP && Optimizations.UseSse41) 789 { 790 EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true); 791 } 792 else 793 { 794 EmitScalarBinaryOpF(context, (op1, op2) => 795 { 796 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); 797 }); 798 } 799 } 800 801 public static void Fmaxnm_V(ArmEmitterContext context) 802 { 803 if (Optimizations.UseAdvSimd) 804 { 805 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV); 806 } 807 else if (Optimizations.FastFP && Optimizations.UseSse41) 808 { 809 EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false); 810 } 811 else 812 { 813 EmitVectorBinaryOpF(context, (op1, op2) => 814 { 815 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); 816 }); 817 } 818 } 819 820 public static void Fmaxnmp_S(ArmEmitterContext context) 821 { 822 if (Optimizations.UseAdvSimd) 823 { 824 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS); 825 } 826 else if (Optimizations.FastFP && Optimizations.UseSse41) 827 { 828 EmitSse2ScalarPairwiseOpF(context, (op1, op2) => 829 { 830 return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2); 831 }); 832 } 833 else 834 { 835 EmitScalarPairwiseOpF(context, (op1, op2) => 836 { 837 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); 838 }); 839 } 840 } 841 842 public static void Fmaxnmp_V(ArmEmitterContext context) 843 { 844 if (Optimizations.UseAdvSimd) 845 { 846 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV); 847 } 848 else if (Optimizations.FastFP && Optimizations.UseSse41) 849 { 850 EmitSse2VectorPairwiseOpF(context, (op1, op2) => 851 { 852 return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2); 853 }); 854 } 855 else 856 { 857 EmitVectorPairwiseOpF(context, (op1, op2) => 858 { 859 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); 860 }); 861 } 862 } 863 864 public static void Fmaxnmv_V(ArmEmitterContext context) 865 { 866 if (Optimizations.UseAdvSimd) 867 { 868 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV); 869 } 870 else if (Optimizations.FastFP && Optimizations.UseSse41) 871 { 872 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => 873 { 874 return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2); 875 }); 876 } 877 else 878 { 879 EmitVectorAcrossVectorOpF(context, (op1, op2) => 880 { 881 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); 882 }); 883 } 884 } 885 886 public static void Fmaxp_S(ArmEmitterContext context) 887 { 888 if (Optimizations.UseAdvSimd) 889 { 890 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS); 891 } 892 else if (Optimizations.FastFP && Optimizations.UseSse41) 893 { 894 EmitSse2ScalarPairwiseOpF(context, (op1, op2) => 895 { 896 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 897 { 898 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); 899 }, scalar: true, op1, op2); 900 }); 901 } 902 else 903 { 904 EmitScalarPairwiseOpF(context, (op1, op2) => 905 { 906 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); 907 }); 908 } 909 } 910 911 public static void Fmaxp_V(ArmEmitterContext context) 912 { 913 if (Optimizations.UseAdvSimd) 914 { 915 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV); 916 } 917 else if (Optimizations.FastFP && Optimizations.UseSse41) 918 { 919 EmitSse2VectorPairwiseOpF(context, (op1, op2) => 920 { 921 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 922 { 923 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); 924 }, scalar: false, op1, op2); 925 }); 926 } 927 else 928 { 929 EmitVectorPairwiseOpF(context, (op1, op2) => 930 { 931 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); 932 }); 933 } 934 } 935 936 public static void Fmaxv_V(ArmEmitterContext context) 937 { 938 if (Optimizations.UseAdvSimd) 939 { 940 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV); 941 } 942 else if (Optimizations.FastFP && Optimizations.UseSse41) 943 { 944 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => 945 { 946 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 947 { 948 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); 949 }, scalar: false, op1, op2); 950 }); 951 } 952 else 953 { 954 EmitVectorAcrossVectorOpF(context, (op1, op2) => 955 { 956 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); 957 }); 958 } 959 } 960 961 public static void Fmin_S(ArmEmitterContext context) 962 { 963 if (Optimizations.UseAdvSimd) 964 { 965 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS); 966 } 967 else if (Optimizations.FastFP && Optimizations.UseSse41) 968 { 969 EmitSse41ProcessNaNsOpF(context, (op1, op2) => 970 { 971 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); 972 }, scalar: true); 973 } 974 else 975 { 976 EmitScalarBinaryOpF(context, (op1, op2) => 977 { 978 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); 979 }); 980 } 981 } 982 983 public static void Fmin_V(ArmEmitterContext context) 984 { 985 if (Optimizations.UseAdvSimd) 986 { 987 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV); 988 } 989 else if (Optimizations.FastFP && Optimizations.UseSse41) 990 { 991 EmitSse41ProcessNaNsOpF(context, (op1, op2) => 992 { 993 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); 994 }, scalar: false); 995 } 996 else 997 { 998 EmitVectorBinaryOpF(context, (op1, op2) => 999 { 1000 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); 1001 }); 1002 } 1003 } 1004 1005 public static void Fminnm_S(ArmEmitterContext context) 1006 { 1007 if (Optimizations.UseAdvSimd) 1008 { 1009 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS); 1010 } 1011 else if (Optimizations.FastFP && Optimizations.UseSse41) 1012 { 1013 EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true); 1014 } 1015 else 1016 { 1017 EmitScalarBinaryOpF(context, (op1, op2) => 1018 { 1019 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); 1020 }); 1021 } 1022 } 1023 1024 public static void Fminnm_V(ArmEmitterContext context) 1025 { 1026 if (Optimizations.UseAdvSimd) 1027 { 1028 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV); 1029 } 1030 else if (Optimizations.FastFP && Optimizations.UseSse41) 1031 { 1032 EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false); 1033 } 1034 else 1035 { 1036 EmitVectorBinaryOpF(context, (op1, op2) => 1037 { 1038 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); 1039 }); 1040 } 1041 } 1042 1043 public static void Fminnmp_S(ArmEmitterContext context) 1044 { 1045 if (Optimizations.UseAdvSimd) 1046 { 1047 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS); 1048 } 1049 else if (Optimizations.FastFP && Optimizations.UseSse41) 1050 { 1051 EmitSse2ScalarPairwiseOpF(context, (op1, op2) => 1052 { 1053 return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2); 1054 }); 1055 } 1056 else 1057 { 1058 EmitScalarPairwiseOpF(context, (op1, op2) => 1059 { 1060 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); 1061 }); 1062 } 1063 } 1064 1065 public static void Fminnmp_V(ArmEmitterContext context) 1066 { 1067 if (Optimizations.UseAdvSimd) 1068 { 1069 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV); 1070 } 1071 else if (Optimizations.FastFP && Optimizations.UseSse41) 1072 { 1073 EmitSse2VectorPairwiseOpF(context, (op1, op2) => 1074 { 1075 return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2); 1076 }); 1077 } 1078 else 1079 { 1080 EmitVectorPairwiseOpF(context, (op1, op2) => 1081 { 1082 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); 1083 }); 1084 } 1085 } 1086 1087 public static void Fminnmv_V(ArmEmitterContext context) 1088 { 1089 if (Optimizations.UseAdvSimd) 1090 { 1091 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV); 1092 } 1093 else if (Optimizations.FastFP && Optimizations.UseSse41) 1094 { 1095 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => 1096 { 1097 return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2); 1098 }); 1099 } 1100 else 1101 { 1102 EmitVectorAcrossVectorOpF(context, (op1, op2) => 1103 { 1104 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); 1105 }); 1106 } 1107 } 1108 1109 public static void Fminp_S(ArmEmitterContext context) 1110 { 1111 if (Optimizations.UseAdvSimd) 1112 { 1113 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS); 1114 } 1115 else if (Optimizations.FastFP && Optimizations.UseSse41) 1116 { 1117 EmitSse2ScalarPairwiseOpF(context, (op1, op2) => 1118 { 1119 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 1120 { 1121 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); 1122 }, scalar: true, op1, op2); 1123 }); 1124 } 1125 else 1126 { 1127 EmitScalarPairwiseOpF(context, (op1, op2) => 1128 { 1129 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); 1130 }); 1131 } 1132 } 1133 1134 public static void Fminp_V(ArmEmitterContext context) 1135 { 1136 if (Optimizations.UseAdvSimd) 1137 { 1138 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV); 1139 } 1140 else if (Optimizations.FastFP && Optimizations.UseSse41) 1141 { 1142 EmitSse2VectorPairwiseOpF(context, (op1, op2) => 1143 { 1144 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 1145 { 1146 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); 1147 }, scalar: false, op1, op2); 1148 }); 1149 } 1150 else 1151 { 1152 EmitVectorPairwiseOpF(context, (op1, op2) => 1153 { 1154 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); 1155 }); 1156 } 1157 } 1158 1159 public static void Fminv_V(ArmEmitterContext context) 1160 { 1161 if (Optimizations.UseAdvSimd) 1162 { 1163 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV); 1164 } 1165 else if (Optimizations.FastFP && Optimizations.UseSse41) 1166 { 1167 EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => 1168 { 1169 return EmitSse41ProcessNaNsOpF(context, (op1, op2) => 1170 { 1171 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); 1172 }, scalar: false, op1, op2); 1173 }); 1174 } 1175 else 1176 { 1177 EmitVectorAcrossVectorOpF(context, (op1, op2) => 1178 { 1179 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); 1180 }); 1181 } 1182 } 1183 1184 public static void Fmla_Se(ArmEmitterContext context) // Fused. 1185 { 1186 if (Optimizations.UseAdvSimd) 1187 { 1188 InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe); 1189 } 1190 else if (Optimizations.UseFma) 1191 { 1192 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 1193 1194 Operand d = GetVec(op.Rd); 1195 Operand n = GetVec(op.Rn); 1196 Operand m = GetVec(op.Rm); 1197 1198 int sizeF = op.Size & 1; 1199 1200 if (sizeF == 0) 1201 { 1202 int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; 1203 1204 Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); 1205 1206 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res); 1207 1208 context.Copy(d, context.VectorZeroUpper96(res)); 1209 } 1210 else /* if (sizeF == 1) */ 1211 { 1212 int shuffleMask = op.Index | op.Index << 1; 1213 1214 Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); 1215 1216 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res); 1217 1218 context.Copy(d, context.VectorZeroUpper64(res)); 1219 } 1220 } 1221 else 1222 { 1223 EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => 1224 { 1225 return context.Add(op1, context.Multiply(op2, op3)); 1226 }); 1227 } 1228 } 1229 1230 public static void Fmla_V(ArmEmitterContext context) // Fused. 1231 { 1232 if (Optimizations.UseAdvSimd) 1233 { 1234 InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV); 1235 } 1236 else if (Optimizations.FastFP && Optimizations.UseSse2) 1237 { 1238 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1239 1240 Operand d = GetVec(op.Rd); 1241 Operand n = GetVec(op.Rn); 1242 Operand m = GetVec(op.Rm); 1243 1244 int sizeF = op.Size & 1; 1245 1246 Operand res; 1247 1248 if (sizeF == 0) 1249 { 1250 if (Optimizations.UseFma) 1251 { 1252 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m); 1253 } 1254 else 1255 { 1256 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); 1257 res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); 1258 } 1259 1260 if (op.RegisterSize == RegisterSize.Simd64) 1261 { 1262 res = context.VectorZeroUpper64(res); 1263 } 1264 1265 context.Copy(d, res); 1266 } 1267 else /* if (sizeF == 1) */ 1268 { 1269 if (Optimizations.UseFma) 1270 { 1271 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m); 1272 } 1273 else 1274 { 1275 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); 1276 res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); 1277 } 1278 1279 context.Copy(d, res); 1280 } 1281 } 1282 else 1283 { 1284 EmitVectorTernaryOpF(context, (op1, op2, op3) => 1285 { 1286 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); 1287 }); 1288 } 1289 } 1290 1291 public static void Fmla_Ve(ArmEmitterContext context) // Fused. 1292 { 1293 if (Optimizations.UseAdvSimd) 1294 { 1295 InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe); 1296 } 1297 else if (Optimizations.FastFP && Optimizations.UseSse2) 1298 { 1299 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 1300 1301 Operand d = GetVec(op.Rd); 1302 Operand n = GetVec(op.Rn); 1303 Operand m = GetVec(op.Rm); 1304 1305 int sizeF = op.Size & 1; 1306 1307 if (sizeF == 0) 1308 { 1309 int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; 1310 1311 Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); 1312 1313 if (Optimizations.UseFma) 1314 { 1315 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res); 1316 } 1317 else 1318 { 1319 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); 1320 res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); 1321 } 1322 1323 if (op.RegisterSize == RegisterSize.Simd64) 1324 { 1325 res = context.VectorZeroUpper64(res); 1326 } 1327 1328 context.Copy(d, res); 1329 } 1330 else /* if (sizeF == 1) */ 1331 { 1332 int shuffleMask = op.Index | op.Index << 1; 1333 1334 Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); 1335 1336 if (Optimizations.UseFma) 1337 { 1338 res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res); 1339 } 1340 else 1341 { 1342 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); 1343 res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); 1344 } 1345 1346 context.Copy(d, res); 1347 } 1348 } 1349 else 1350 { 1351 EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => 1352 { 1353 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); 1354 }); 1355 } 1356 } 1357 1358 public static void Fmls_Se(ArmEmitterContext context) // Fused. 1359 { 1360 if (Optimizations.UseAdvSimd) 1361 { 1362 InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe); 1363 } 1364 else if (Optimizations.UseFma) 1365 { 1366 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 1367 1368 Operand d = GetVec(op.Rd); 1369 Operand n = GetVec(op.Rn); 1370 Operand m = GetVec(op.Rm); 1371 1372 int sizeF = op.Size & 1; 1373 1374 if (sizeF == 0) 1375 { 1376 int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; 1377 1378 Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); 1379 1380 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res); 1381 1382 context.Copy(d, context.VectorZeroUpper96(res)); 1383 } 1384 else /* if (sizeF == 1) */ 1385 { 1386 int shuffleMask = op.Index | op.Index << 1; 1387 1388 Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); 1389 1390 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res); 1391 1392 context.Copy(d, context.VectorZeroUpper64(res)); 1393 } 1394 } 1395 else 1396 { 1397 EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => 1398 { 1399 return context.Subtract(op1, context.Multiply(op2, op3)); 1400 }); 1401 } 1402 } 1403 1404 public static void Fmls_V(ArmEmitterContext context) // Fused. 1405 { 1406 if (Optimizations.UseAdvSimd) 1407 { 1408 InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV); 1409 } 1410 else if (Optimizations.FastFP && Optimizations.UseSse2) 1411 { 1412 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1413 1414 Operand d = GetVec(op.Rd); 1415 Operand n = GetVec(op.Rn); 1416 Operand m = GetVec(op.Rm); 1417 1418 int sizeF = op.Size & 1; 1419 1420 Operand res; 1421 1422 if (sizeF == 0) 1423 { 1424 if (Optimizations.UseFma) 1425 { 1426 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m); 1427 } 1428 else 1429 { 1430 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); 1431 res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); 1432 } 1433 1434 if (op.RegisterSize == RegisterSize.Simd64) 1435 { 1436 res = context.VectorZeroUpper64(res); 1437 } 1438 1439 context.Copy(d, res); 1440 } 1441 else /* if (sizeF == 1) */ 1442 { 1443 if (Optimizations.UseFma) 1444 { 1445 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m); 1446 } 1447 else 1448 { 1449 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); 1450 res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); 1451 } 1452 1453 context.Copy(d, res); 1454 } 1455 } 1456 else 1457 { 1458 EmitVectorTernaryOpF(context, (op1, op2, op3) => 1459 { 1460 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); 1461 }); 1462 } 1463 } 1464 1465 public static void Fmls_Ve(ArmEmitterContext context) // Fused. 1466 { 1467 if (Optimizations.UseAdvSimd) 1468 { 1469 InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe); 1470 } 1471 else if (Optimizations.FastFP && Optimizations.UseSse2) 1472 { 1473 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 1474 1475 Operand d = GetVec(op.Rd); 1476 Operand n = GetVec(op.Rn); 1477 Operand m = GetVec(op.Rm); 1478 1479 int sizeF = op.Size & 1; 1480 1481 if (sizeF == 0) 1482 { 1483 int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; 1484 1485 Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); 1486 1487 if (Optimizations.UseFma) 1488 { 1489 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res); 1490 } 1491 else 1492 { 1493 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); 1494 res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); 1495 } 1496 1497 if (op.RegisterSize == RegisterSize.Simd64) 1498 { 1499 res = context.VectorZeroUpper64(res); 1500 } 1501 1502 context.Copy(d, res); 1503 } 1504 else /* if (sizeF == 1) */ 1505 { 1506 int shuffleMask = op.Index | op.Index << 1; 1507 1508 Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); 1509 1510 if (Optimizations.UseFma) 1511 { 1512 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res); 1513 } 1514 else 1515 { 1516 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); 1517 res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); 1518 } 1519 1520 context.Copy(d, res); 1521 } 1522 } 1523 else 1524 { 1525 EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => 1526 { 1527 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); 1528 }); 1529 } 1530 } 1531 1532 public static void Fmsub_S(ArmEmitterContext context) // Fused. 1533 { 1534 if (Optimizations.UseAdvSimd) 1535 { 1536 InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS); 1537 } 1538 else if (Optimizations.FastFP && Optimizations.UseSse2) 1539 { 1540 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1541 1542 Operand d = GetVec(op.Rd); 1543 Operand a = GetVec(op.Ra); 1544 Operand n = GetVec(op.Rn); 1545 Operand m = GetVec(op.Rm); 1546 1547 Operand res; 1548 1549 if (op.Size == 0) 1550 { 1551 if (Optimizations.UseFma) 1552 { 1553 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m); 1554 } 1555 else 1556 { 1557 res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); 1558 res = context.AddIntrinsic(Intrinsic.X86Subss, a, res); 1559 } 1560 1561 context.Copy(d, context.VectorZeroUpper96(res)); 1562 } 1563 else /* if (op.Size == 1) */ 1564 { 1565 if (Optimizations.UseFma) 1566 { 1567 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m); 1568 } 1569 else 1570 { 1571 res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); 1572 res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res); 1573 } 1574 1575 context.Copy(d, context.VectorZeroUpper64(res)); 1576 } 1577 } 1578 else 1579 { 1580 EmitScalarTernaryRaOpF(context, (op1, op2, op3) => 1581 { 1582 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); 1583 }); 1584 } 1585 } 1586 1587 public static void Fmul_S(ArmEmitterContext context) 1588 { 1589 if (Optimizations.UseAdvSimd) 1590 { 1591 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS); 1592 } 1593 else if (Optimizations.FastFP && Optimizations.UseSse2) 1594 { 1595 EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); 1596 } 1597 else if (Optimizations.FastFP) 1598 { 1599 EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); 1600 } 1601 else 1602 { 1603 EmitScalarBinaryOpF(context, (op1, op2) => 1604 { 1605 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); 1606 }); 1607 } 1608 } 1609 1610 public static void Fmul_Se(ArmEmitterContext context) 1611 { 1612 if (Optimizations.UseAdvSimd) 1613 { 1614 InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe); 1615 } 1616 else 1617 { 1618 EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); 1619 } 1620 } 1621 1622 public static void Fmul_V(ArmEmitterContext context) 1623 { 1624 if (Optimizations.UseAdvSimd) 1625 { 1626 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV); 1627 } 1628 else if (Optimizations.FastFP && Optimizations.UseSse2) 1629 { 1630 EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); 1631 } 1632 else if (Optimizations.FastFP) 1633 { 1634 EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); 1635 } 1636 else 1637 { 1638 EmitVectorBinaryOpF(context, (op1, op2) => 1639 { 1640 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); 1641 }); 1642 } 1643 } 1644 1645 public static void Fmul_Ve(ArmEmitterContext context) 1646 { 1647 if (Optimizations.UseAdvSimd) 1648 { 1649 InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe); 1650 } 1651 else if (Optimizations.FastFP && Optimizations.UseSse2) 1652 { 1653 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 1654 1655 Operand n = GetVec(op.Rn); 1656 Operand m = GetVec(op.Rm); 1657 1658 int sizeF = op.Size & 1; 1659 1660 if (sizeF == 0) 1661 { 1662 int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; 1663 1664 Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); 1665 1666 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); 1667 1668 if (op.RegisterSize == RegisterSize.Simd64) 1669 { 1670 res = context.VectorZeroUpper64(res); 1671 } 1672 1673 context.Copy(GetVec(op.Rd), res); 1674 } 1675 else /* if (sizeF == 1) */ 1676 { 1677 int shuffleMask = op.Index | op.Index << 1; 1678 1679 Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); 1680 1681 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); 1682 1683 context.Copy(GetVec(op.Rd), res); 1684 } 1685 } 1686 else if (Optimizations.FastFP) 1687 { 1688 EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); 1689 } 1690 else 1691 { 1692 EmitVectorBinaryOpByElemF(context, (op1, op2) => 1693 { 1694 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); 1695 }); 1696 } 1697 } 1698 1699 public static void Fmulx_S(ArmEmitterContext context) 1700 { 1701 if (Optimizations.UseAdvSimd) 1702 { 1703 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS); 1704 } 1705 else 1706 { 1707 EmitScalarBinaryOpF(context, (op1, op2) => 1708 { 1709 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); 1710 }); 1711 } 1712 } 1713 1714 public static void Fmulx_Se(ArmEmitterContext context) 1715 { 1716 if (Optimizations.UseAdvSimd) 1717 { 1718 InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe); 1719 } 1720 else 1721 { 1722 EmitScalarBinaryOpByElemF(context, (op1, op2) => 1723 { 1724 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); 1725 }); 1726 } 1727 } 1728 1729 public static void Fmulx_V(ArmEmitterContext context) 1730 { 1731 if (Optimizations.UseAdvSimd) 1732 { 1733 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV); 1734 } 1735 else 1736 { 1737 EmitVectorBinaryOpF(context, (op1, op2) => 1738 { 1739 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); 1740 }); 1741 } 1742 } 1743 1744 public static void Fmulx_Ve(ArmEmitterContext context) 1745 { 1746 if (Optimizations.UseAdvSimd) 1747 { 1748 InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe); 1749 } 1750 else 1751 { 1752 EmitVectorBinaryOpByElemF(context, (op1, op2) => 1753 { 1754 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); 1755 }); 1756 } 1757 } 1758 1759 public static void Fneg_S(ArmEmitterContext context) 1760 { 1761 if (Optimizations.UseAdvSimd) 1762 { 1763 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS); 1764 } 1765 else if (Optimizations.UseSse2) 1766 { 1767 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1768 1769 if (op.Size == 0) 1770 { 1771 Operand mask = X86GetScalar(context, -0f); 1772 1773 Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); 1774 1775 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 1776 } 1777 else /* if (op.Size == 1) */ 1778 { 1779 Operand mask = X86GetScalar(context, -0d); 1780 1781 Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); 1782 1783 context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); 1784 } 1785 } 1786 else 1787 { 1788 EmitScalarUnaryOpF(context, (op1) => context.Negate(op1)); 1789 } 1790 } 1791 1792 public static void Fneg_V(ArmEmitterContext context) 1793 { 1794 if (Optimizations.UseAdvSimd) 1795 { 1796 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV); 1797 } 1798 else if (Optimizations.UseSse2) 1799 { 1800 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1801 1802 int sizeF = op.Size & 1; 1803 1804 if (sizeF == 0) 1805 { 1806 Operand mask = X86GetAllElements(context, -0f); 1807 1808 Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); 1809 1810 if (op.RegisterSize == RegisterSize.Simd64) 1811 { 1812 res = context.VectorZeroUpper64(res); 1813 } 1814 1815 context.Copy(GetVec(op.Rd), res); 1816 } 1817 else /* if (sizeF == 1) */ 1818 { 1819 Operand mask = X86GetAllElements(context, -0d); 1820 1821 Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); 1822 1823 context.Copy(GetVec(op.Rd), res); 1824 } 1825 } 1826 else 1827 { 1828 EmitVectorUnaryOpF(context, (op1) => context.Negate(op1)); 1829 } 1830 } 1831 1832 public static void Fnmadd_S(ArmEmitterContext context) // Fused. 1833 { 1834 if (Optimizations.UseAdvSimd) 1835 { 1836 InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS); 1837 } 1838 else if (Optimizations.FastFP && Optimizations.UseSse2) 1839 { 1840 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1841 1842 Operand d = GetVec(op.Rd); 1843 Operand a = GetVec(op.Ra); 1844 Operand n = GetVec(op.Rn); 1845 Operand m = GetVec(op.Rm); 1846 1847 Operand res; 1848 1849 if (op.Size == 0) 1850 { 1851 if (Optimizations.UseFma) 1852 { 1853 res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m); 1854 } 1855 else 1856 { 1857 Operand mask = X86GetScalar(context, -0f); 1858 Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a); 1859 1860 res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); 1861 res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res); 1862 } 1863 1864 context.Copy(d, context.VectorZeroUpper96(res)); 1865 } 1866 else /* if (op.Size == 1) */ 1867 { 1868 if (Optimizations.UseFma) 1869 { 1870 res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m); 1871 } 1872 else 1873 { 1874 Operand mask = X86GetScalar(context, -0d); 1875 Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a); 1876 1877 res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); 1878 res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res); 1879 } 1880 1881 context.Copy(d, context.VectorZeroUpper64(res)); 1882 } 1883 } 1884 else 1885 { 1886 EmitScalarTernaryRaOpF(context, (op1, op2, op3) => 1887 { 1888 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3); 1889 }); 1890 } 1891 } 1892 1893 public static void Fnmsub_S(ArmEmitterContext context) // Fused. 1894 { 1895 if (Optimizations.UseAdvSimd) 1896 { 1897 InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS); 1898 } 1899 else if (Optimizations.FastFP && Optimizations.UseSse2) 1900 { 1901 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1902 1903 Operand d = GetVec(op.Rd); 1904 Operand a = GetVec(op.Ra); 1905 Operand n = GetVec(op.Rn); 1906 Operand m = GetVec(op.Rm); 1907 1908 Operand res; 1909 1910 if (op.Size == 0) 1911 { 1912 if (Optimizations.UseFma) 1913 { 1914 res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m); 1915 } 1916 else 1917 { 1918 Operand mask = X86GetScalar(context, -0f); 1919 Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a); 1920 1921 res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); 1922 res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res); 1923 } 1924 1925 context.Copy(d, context.VectorZeroUpper96(res)); 1926 } 1927 else /* if (op.Size == 1) */ 1928 { 1929 if (Optimizations.UseFma) 1930 { 1931 res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m); 1932 } 1933 else 1934 { 1935 Operand mask = X86GetScalar(context, -0d); 1936 Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a); 1937 1938 res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); 1939 res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res); 1940 } 1941 1942 context.Copy(d, context.VectorZeroUpper64(res)); 1943 } 1944 } 1945 else 1946 { 1947 EmitScalarTernaryRaOpF(context, (op1, op2, op3) => 1948 { 1949 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3); 1950 }); 1951 } 1952 } 1953 1954 public static void Fnmul_S(ArmEmitterContext context) 1955 { 1956 if (Optimizations.UseAdvSimd) 1957 { 1958 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS); 1959 } 1960 else 1961 { 1962 EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); 1963 } 1964 } 1965 1966 public static void Frecpe_S(ArmEmitterContext context) 1967 { 1968 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1969 1970 int sizeF = op.Size & 1; 1971 1972 if (Optimizations.UseAdvSimd) 1973 { 1974 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS); 1975 } 1976 else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) 1977 { 1978 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true); 1979 1980 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 1981 } 1982 else 1983 { 1984 EmitScalarUnaryOpF(context, (op1) => 1985 { 1986 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1); 1987 }); 1988 } 1989 } 1990 1991 public static void Frecpe_V(ArmEmitterContext context) 1992 { 1993 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1994 1995 int sizeF = op.Size & 1; 1996 1997 if (Optimizations.UseAdvSimd) 1998 { 1999 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV); 2000 } 2001 else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) 2002 { 2003 Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false); 2004 2005 if (op.RegisterSize == RegisterSize.Simd64) 2006 { 2007 res = context.VectorZeroUpper64(res); 2008 } 2009 2010 context.Copy(GetVec(op.Rd), res); 2011 } 2012 else 2013 { 2014 EmitVectorUnaryOpF(context, (op1) => 2015 { 2016 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1); 2017 }); 2018 } 2019 } 2020 2021 public static void Frecps_S(ArmEmitterContext context) // Fused. 2022 { 2023 if (Optimizations.UseAdvSimd) 2024 { 2025 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS); 2026 } 2027 else if (Optimizations.FastFP && Optimizations.UseSse41) 2028 { 2029 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 2030 2031 Operand n = GetVec(op.Rn); 2032 Operand m = GetVec(op.Rm); 2033 2034 int sizeF = op.Size & 1; 2035 2036 Operand res; 2037 2038 if (sizeF == 0) 2039 { 2040 Operand mask = X86GetScalar(context, 2f); 2041 2042 if (Optimizations.UseFma) 2043 { 2044 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m); 2045 } 2046 else 2047 { 2048 res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); 2049 res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res); 2050 } 2051 2052 res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF); 2053 2054 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 2055 } 2056 else /* if (sizeF == 1) */ 2057 { 2058 Operand mask = X86GetScalar(context, 2d); 2059 2060 if (Optimizations.UseFma) 2061 { 2062 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m); 2063 } 2064 else 2065 { 2066 res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); 2067 res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res); 2068 } 2069 2070 res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF); 2071 2072 context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); 2073 } 2074 } 2075 else 2076 { 2077 EmitScalarBinaryOpF(context, (op1, op2) => 2078 { 2079 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2); 2080 }); 2081 } 2082 } 2083 2084 public static void Frecps_V(ArmEmitterContext context) // Fused. 2085 { 2086 if (Optimizations.UseAdvSimd) 2087 { 2088 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV); 2089 } 2090 else if (Optimizations.FastFP && Optimizations.UseSse41) 2091 { 2092 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 2093 2094 Operand n = GetVec(op.Rn); 2095 Operand m = GetVec(op.Rm); 2096 2097 int sizeF = op.Size & 1; 2098 2099 Operand res; 2100 2101 if (sizeF == 0) 2102 { 2103 Operand mask = X86GetAllElements(context, 2f); 2104 2105 if (Optimizations.UseFma) 2106 { 2107 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m); 2108 } 2109 else 2110 { 2111 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); 2112 res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res); 2113 } 2114 2115 res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF); 2116 2117 if (op.RegisterSize == RegisterSize.Simd64) 2118 { 2119 res = context.VectorZeroUpper64(res); 2120 } 2121 2122 context.Copy(GetVec(op.Rd), res); 2123 } 2124 else /* if (sizeF == 1) */ 2125 { 2126 Operand mask = X86GetAllElements(context, 2d); 2127 2128 if (Optimizations.UseFma) 2129 { 2130 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m); 2131 } 2132 else 2133 { 2134 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); 2135 res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res); 2136 } 2137 2138 res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF); 2139 2140 context.Copy(GetVec(op.Rd), res); 2141 } 2142 } 2143 else 2144 { 2145 EmitVectorBinaryOpF(context, (op1, op2) => 2146 { 2147 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2); 2148 }); 2149 } 2150 } 2151 2152 public static void Frecpx_S(ArmEmitterContext context) 2153 { 2154 if (Optimizations.UseAdvSimd) 2155 { 2156 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS); 2157 } 2158 else 2159 { 2160 EmitScalarUnaryOpF(context, (op1) => 2161 { 2162 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); 2163 }); 2164 } 2165 } 2166 2167 public static void Frinta_S(ArmEmitterContext context) 2168 { 2169 if (Optimizations.UseAdvSimd) 2170 { 2171 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS); 2172 } 2173 else if (Optimizations.UseSse41) 2174 { 2175 EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway); 2176 } 2177 else 2178 { 2179 EmitScalarUnaryOpF(context, (op1) => 2180 { 2181 return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); 2182 }); 2183 } 2184 } 2185 2186 public static void Frinta_V(ArmEmitterContext context) 2187 { 2188 if (Optimizations.UseAdvSimd) 2189 { 2190 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV); 2191 } 2192 else if (Optimizations.UseSse41) 2193 { 2194 EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway); 2195 } 2196 else 2197 { 2198 EmitVectorUnaryOpF(context, (op1) => 2199 { 2200 return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); 2201 }); 2202 } 2203 } 2204 2205 public static void Frinti_S(ArmEmitterContext context) 2206 { 2207 if (Optimizations.UseAdvSimd) 2208 { 2209 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS); 2210 } 2211 else 2212 { 2213 EmitScalarUnaryOpF(context, (op1) => 2214 { 2215 return EmitRoundByRMode(context, op1); 2216 }); 2217 } 2218 } 2219 2220 public static void Frinti_V(ArmEmitterContext context) 2221 { 2222 if (Optimizations.UseAdvSimd) 2223 { 2224 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV); 2225 } 2226 else 2227 { 2228 EmitVectorUnaryOpF(context, (op1) => 2229 { 2230 return EmitRoundByRMode(context, op1); 2231 }); 2232 } 2233 } 2234 2235 public static void Frintm_S(ArmEmitterContext context) 2236 { 2237 if (Optimizations.UseAdvSimd) 2238 { 2239 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS); 2240 } 2241 else if (Optimizations.UseSse41) 2242 { 2243 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); 2244 } 2245 else 2246 { 2247 EmitScalarUnaryOpF(context, (op1) => 2248 { 2249 return EmitUnaryMathCall(context, nameof(Math.Floor), op1); 2250 }); 2251 } 2252 } 2253 2254 public static void Frintm_V(ArmEmitterContext context) 2255 { 2256 if (Optimizations.UseAdvSimd) 2257 { 2258 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV); 2259 } 2260 else if (Optimizations.UseSse41) 2261 { 2262 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); 2263 } 2264 else 2265 { 2266 EmitVectorUnaryOpF(context, (op1) => 2267 { 2268 return EmitUnaryMathCall(context, nameof(Math.Floor), op1); 2269 }); 2270 } 2271 } 2272 2273 public static void Frintn_S(ArmEmitterContext context) 2274 { 2275 if (Optimizations.UseAdvSimd) 2276 { 2277 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS); 2278 } 2279 else if (Optimizations.UseSse41) 2280 { 2281 EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest); 2282 } 2283 else 2284 { 2285 EmitScalarUnaryOpF(context, (op1) => 2286 { 2287 return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); 2288 }); 2289 } 2290 } 2291 2292 public static void Frintn_V(ArmEmitterContext context) 2293 { 2294 if (Optimizations.UseAdvSimd) 2295 { 2296 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV); 2297 } 2298 else if (Optimizations.UseSse41) 2299 { 2300 EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest); 2301 } 2302 else 2303 { 2304 EmitVectorUnaryOpF(context, (op1) => 2305 { 2306 return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); 2307 }); 2308 } 2309 } 2310 2311 public static void Frintp_S(ArmEmitterContext context) 2312 { 2313 if (Optimizations.UseAdvSimd) 2314 { 2315 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS); 2316 } 2317 else if (Optimizations.UseSse41) 2318 { 2319 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); 2320 } 2321 else 2322 { 2323 EmitScalarUnaryOpF(context, (op1) => 2324 { 2325 return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1); 2326 }); 2327 } 2328 } 2329 2330 public static void Frintp_V(ArmEmitterContext context) 2331 { 2332 if (Optimizations.UseAdvSimd) 2333 { 2334 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV); 2335 } 2336 else if (Optimizations.UseSse41) 2337 { 2338 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); 2339 } 2340 else 2341 { 2342 EmitVectorUnaryOpF(context, (op1) => 2343 { 2344 return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1); 2345 }); 2346 } 2347 } 2348 2349 public static void Frintx_S(ArmEmitterContext context) 2350 { 2351 if (Optimizations.UseAdvSimd) 2352 { 2353 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS); 2354 } 2355 else 2356 { 2357 EmitScalarUnaryOpF(context, (op1) => 2358 { 2359 return EmitRoundByRMode(context, op1); 2360 }); 2361 } 2362 } 2363 2364 public static void Frintx_V(ArmEmitterContext context) 2365 { 2366 if (Optimizations.UseAdvSimd) 2367 { 2368 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV); 2369 } 2370 else 2371 { 2372 EmitVectorUnaryOpF(context, (op1) => 2373 { 2374 return EmitRoundByRMode(context, op1); 2375 }); 2376 } 2377 } 2378 2379 public static void Frintz_S(ArmEmitterContext context) 2380 { 2381 if (Optimizations.UseAdvSimd) 2382 { 2383 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS); 2384 } 2385 else if (Optimizations.UseSse41) 2386 { 2387 EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero); 2388 } 2389 else 2390 { 2391 EmitScalarUnaryOpF(context, (op1) => 2392 { 2393 return EmitUnaryMathCall(context, nameof(Math.Truncate), op1); 2394 }); 2395 } 2396 } 2397 2398 public static void Frintz_V(ArmEmitterContext context) 2399 { 2400 if (Optimizations.UseAdvSimd) 2401 { 2402 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV); 2403 } 2404 else if (Optimizations.UseSse41) 2405 { 2406 EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero); 2407 } 2408 else 2409 { 2410 EmitVectorUnaryOpF(context, (op1) => 2411 { 2412 return EmitUnaryMathCall(context, nameof(Math.Truncate), op1); 2413 }); 2414 } 2415 } 2416 2417 public static void Frsqrte_S(ArmEmitterContext context) 2418 { 2419 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 2420 2421 int sizeF = op.Size & 1; 2422 2423 if (Optimizations.UseAdvSimd) 2424 { 2425 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS); 2426 } 2427 else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) 2428 { 2429 // RSQRTSS handles subnormals as zero, which differs from Arm, so we can't use it here. 2430 2431 Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtss, GetVec(op.Rn)); 2432 res = context.AddIntrinsic(Intrinsic.X86Rcpss, res); 2433 res = EmitSse41Round32Exp8OpF(context, res, scalar: true); 2434 2435 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 2436 } 2437 else 2438 { 2439 EmitScalarUnaryOpF(context, (op1) => 2440 { 2441 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1); 2442 }); 2443 } 2444 } 2445 2446 public static void Frsqrte_V(ArmEmitterContext context) 2447 { 2448 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 2449 2450 int sizeF = op.Size & 1; 2451 2452 if (Optimizations.UseAdvSimd) 2453 { 2454 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV); 2455 } 2456 else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) 2457 { 2458 // RSQRTPS handles subnormals as zero, which differs from Arm, so we can't use it here. 2459 2460 Operand res = context.AddIntrinsic(Intrinsic.X86Sqrtps, GetVec(op.Rn)); 2461 res = context.AddIntrinsic(Intrinsic.X86Rcpps, res); 2462 res = EmitSse41Round32Exp8OpF(context, res, scalar: false); 2463 2464 if (op.RegisterSize == RegisterSize.Simd64) 2465 { 2466 res = context.VectorZeroUpper64(res); 2467 } 2468 2469 context.Copy(GetVec(op.Rd), res); 2470 } 2471 else 2472 { 2473 EmitVectorUnaryOpF(context, (op1) => 2474 { 2475 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1); 2476 }); 2477 } 2478 } 2479 2480 public static void Frsqrts_S(ArmEmitterContext context) // Fused. 2481 { 2482 if (Optimizations.UseAdvSimd) 2483 { 2484 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS); 2485 } 2486 else if (Optimizations.FastFP && Optimizations.UseSse41) 2487 { 2488 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 2489 2490 Operand n = GetVec(op.Rn); 2491 Operand m = GetVec(op.Rm); 2492 2493 int sizeF = op.Size & 1; 2494 2495 Operand res; 2496 2497 if (sizeF == 0) 2498 { 2499 Operand maskHalf = X86GetScalar(context, 0.5f); 2500 Operand maskThree = X86GetScalar(context, 3f); 2501 Operand maskOneHalf = X86GetScalar(context, 1.5f); 2502 2503 if (Optimizations.UseFma) 2504 { 2505 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m); 2506 } 2507 else 2508 { 2509 res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); 2510 res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res); 2511 } 2512 2513 res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res); 2514 res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF); 2515 2516 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 2517 } 2518 else /* if (sizeF == 1) */ 2519 { 2520 Operand maskHalf = X86GetScalar(context, 0.5d); 2521 Operand maskThree = X86GetScalar(context, 3d); 2522 Operand maskOneHalf = X86GetScalar(context, 1.5d); 2523 2524 if (Optimizations.UseFma) 2525 { 2526 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m); 2527 } 2528 else 2529 { 2530 res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); 2531 res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res); 2532 } 2533 2534 res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res); 2535 res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF); 2536 2537 context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); 2538 } 2539 } 2540 else 2541 { 2542 EmitScalarBinaryOpF(context, (op1, op2) => 2543 { 2544 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2); 2545 }); 2546 } 2547 } 2548 2549 public static void Frsqrts_V(ArmEmitterContext context) // Fused. 2550 { 2551 if (Optimizations.UseAdvSimd) 2552 { 2553 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV); 2554 } 2555 else if (Optimizations.FastFP && Optimizations.UseSse41) 2556 { 2557 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 2558 2559 Operand n = GetVec(op.Rn); 2560 Operand m = GetVec(op.Rm); 2561 2562 int sizeF = op.Size & 1; 2563 2564 Operand res; 2565 2566 if (sizeF == 0) 2567 { 2568 Operand maskHalf = X86GetAllElements(context, 0.5f); 2569 Operand maskThree = X86GetAllElements(context, 3f); 2570 Operand maskOneHalf = X86GetAllElements(context, 1.5f); 2571 2572 if (Optimizations.UseFma) 2573 { 2574 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m); 2575 } 2576 else 2577 { 2578 res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); 2579 res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res); 2580 } 2581 2582 res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); 2583 res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF); 2584 2585 if (op.RegisterSize == RegisterSize.Simd64) 2586 { 2587 res = context.VectorZeroUpper64(res); 2588 } 2589 2590 context.Copy(GetVec(op.Rd), res); 2591 } 2592 else /* if (sizeF == 1) */ 2593 { 2594 Operand maskHalf = X86GetAllElements(context, 0.5d); 2595 Operand maskThree = X86GetAllElements(context, 3d); 2596 Operand maskOneHalf = X86GetAllElements(context, 1.5d); 2597 2598 if (Optimizations.UseFma) 2599 { 2600 res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m); 2601 } 2602 else 2603 { 2604 res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); 2605 res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res); 2606 } 2607 2608 res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); 2609 res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF); 2610 2611 context.Copy(GetVec(op.Rd), res); 2612 } 2613 } 2614 else 2615 { 2616 EmitVectorBinaryOpF(context, (op1, op2) => 2617 { 2618 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2); 2619 }); 2620 } 2621 } 2622 2623 public static void Fsqrt_S(ArmEmitterContext context) 2624 { 2625 if (Optimizations.UseAdvSimd) 2626 { 2627 InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS); 2628 } 2629 else if (Optimizations.FastFP && Optimizations.UseSse2) 2630 { 2631 EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); 2632 } 2633 else 2634 { 2635 EmitScalarUnaryOpF(context, (op1) => 2636 { 2637 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); 2638 }); 2639 } 2640 } 2641 2642 public static void Fsqrt_V(ArmEmitterContext context) 2643 { 2644 if (Optimizations.UseAdvSimd) 2645 { 2646 InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV); 2647 } 2648 else if (Optimizations.FastFP && Optimizations.UseSse2) 2649 { 2650 EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd); 2651 } 2652 else 2653 { 2654 EmitVectorUnaryOpF(context, (op1) => 2655 { 2656 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); 2657 }); 2658 } 2659 } 2660 2661 public static void Fsub_S(ArmEmitterContext context) 2662 { 2663 if (Optimizations.UseAdvSimd) 2664 { 2665 InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS); 2666 } 2667 else if (Optimizations.FastFP && Optimizations.UseSse2) 2668 { 2669 EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); 2670 } 2671 else if (Optimizations.FastFP) 2672 { 2673 EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); 2674 } 2675 else 2676 { 2677 EmitScalarBinaryOpF(context, (op1, op2) => 2678 { 2679 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); 2680 }); 2681 } 2682 } 2683 2684 public static void Fsub_V(ArmEmitterContext context) 2685 { 2686 if (Optimizations.UseAdvSimd) 2687 { 2688 InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV); 2689 } 2690 else if (Optimizations.FastFP && Optimizations.UseSse2) 2691 { 2692 EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); 2693 } 2694 else if (Optimizations.FastFP) 2695 { 2696 EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); 2697 } 2698 else 2699 { 2700 EmitVectorBinaryOpF(context, (op1, op2) => 2701 { 2702 return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); 2703 }); 2704 } 2705 } 2706 2707 public static void Mla_V(ArmEmitterContext context) 2708 { 2709 if (Optimizations.UseAdvSimd) 2710 { 2711 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV); 2712 } 2713 else if (Optimizations.UseSse41) 2714 { 2715 EmitSse41VectorMul_AddSub(context, AddSub.Add); 2716 } 2717 else 2718 { 2719 EmitVectorTernaryOpZx(context, (op1, op2, op3) => 2720 { 2721 return context.Add(op1, context.Multiply(op2, op3)); 2722 }); 2723 } 2724 } 2725 2726 public static void Mla_Ve(ArmEmitterContext context) 2727 { 2728 if (Optimizations.UseAdvSimd) 2729 { 2730 InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe); 2731 } 2732 else 2733 { 2734 EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => 2735 { 2736 return context.Add(op1, context.Multiply(op2, op3)); 2737 }); 2738 } 2739 } 2740 2741 public static void Mls_V(ArmEmitterContext context) 2742 { 2743 if (Optimizations.UseAdvSimd) 2744 { 2745 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV); 2746 } 2747 else if (Optimizations.UseSse41) 2748 { 2749 EmitSse41VectorMul_AddSub(context, AddSub.Subtract); 2750 } 2751 else 2752 { 2753 EmitVectorTernaryOpZx(context, (op1, op2, op3) => 2754 { 2755 return context.Subtract(op1, context.Multiply(op2, op3)); 2756 }); 2757 } 2758 } 2759 2760 public static void Mls_Ve(ArmEmitterContext context) 2761 { 2762 if (Optimizations.UseAdvSimd) 2763 { 2764 InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe); 2765 } 2766 else 2767 { 2768 EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => 2769 { 2770 return context.Subtract(op1, context.Multiply(op2, op3)); 2771 }); 2772 } 2773 } 2774 2775 public static void Mul_V(ArmEmitterContext context) 2776 { 2777 if (Optimizations.UseAdvSimd) 2778 { 2779 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV); 2780 } 2781 else if (Optimizations.UseSse41) 2782 { 2783 EmitSse41VectorMul_AddSub(context, AddSub.None); 2784 } 2785 else 2786 { 2787 EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); 2788 } 2789 } 2790 2791 public static void Mul_Ve(ArmEmitterContext context) 2792 { 2793 if (Optimizations.UseAdvSimd) 2794 { 2795 InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe); 2796 } 2797 else 2798 { 2799 EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); 2800 } 2801 } 2802 2803 public static void Neg_S(ArmEmitterContext context) 2804 { 2805 if (Optimizations.UseAdvSimd) 2806 { 2807 InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS); 2808 } 2809 else 2810 { 2811 EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); 2812 } 2813 } 2814 2815 public static void Neg_V(ArmEmitterContext context) 2816 { 2817 if (Optimizations.UseAdvSimd) 2818 { 2819 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV); 2820 } 2821 else if (Optimizations.UseSse2) 2822 { 2823 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 2824 2825 Intrinsic subInst = X86PsubInstruction[op.Size]; 2826 2827 Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn)); 2828 2829 if (op.RegisterSize == RegisterSize.Simd64) 2830 { 2831 res = context.VectorZeroUpper64(res); 2832 } 2833 2834 context.Copy(GetVec(op.Rd), res); 2835 } 2836 else 2837 { 2838 EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1)); 2839 } 2840 } 2841 2842 public static void Pmull_V(ArmEmitterContext context) 2843 { 2844 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 2845 2846 if (Optimizations.UseArm64Pmull) 2847 { 2848 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV); 2849 } 2850 else if (Optimizations.UsePclmulqdq && op.Size == 3) 2851 { 2852 Operand n = GetVec(op.Rn); 2853 Operand m = GetVec(op.Rm); 2854 2855 int imm8 = op.RegisterSize == RegisterSize.Simd64 ? 0b0000_0000 : 0b0001_0001; 2856 2857 Operand res = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, n, m, Const(imm8)); 2858 2859 context.Copy(GetVec(op.Rd), res); 2860 } 2861 else if (Optimizations.UseSse41) 2862 { 2863 Operand n = GetVec(op.Rn); 2864 Operand m = GetVec(op.Rm); 2865 2866 if (op.RegisterSize == RegisterSize.Simd64) 2867 { 2868 n = context.VectorZeroUpper64(n); 2869 m = context.VectorZeroUpper64(m); 2870 } 2871 else /* if (op.RegisterSize == RegisterSize.Simd128) */ 2872 { 2873 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 2874 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 2875 } 2876 2877 Operand res = context.VectorZero(); 2878 2879 if (op.Size == 0) 2880 { 2881 n = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, n); 2882 m = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, m); 2883 2884 for (int i = 0; i < 8; i++) 2885 { 2886 Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i)); 2887 mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15)); 2888 2889 Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i)); 2890 tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); 2891 2892 res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp); 2893 } 2894 } 2895 else /* if (op.Size == 3) */ 2896 { 2897 Operand zero = context.VectorZero(); 2898 2899 for (int i = 0; i < 64; i++) 2900 { 2901 Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n); 2902 mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i)); 2903 mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63)); 2904 mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask); 2905 2906 Operand tmp = EmitSse2Sll_128(context, m, i); 2907 tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); 2908 2909 res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp); 2910 } 2911 } 2912 2913 context.Copy(GetVec(op.Rd), res); 2914 } 2915 else 2916 { 2917 Operand n = GetVec(op.Rn); 2918 Operand m = GetVec(op.Rm); 2919 2920 Operand res; 2921 2922 if (op.Size == 0) 2923 { 2924 res = context.VectorZero(); 2925 2926 int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 8; 2927 2928 for (int index = 0; index < 8; index++) 2929 { 2930 Operand ne = context.VectorExtract8(n, part + index); 2931 Operand me = context.VectorExtract8(m, part + index); 2932 2933 Operand de = EmitPolynomialMultiply(context, ne, me, 8); 2934 2935 res = EmitVectorInsert(context, res, de, index, 1); 2936 } 2937 } 2938 else /* if (op.Size == 3) */ 2939 { 2940 int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 1; 2941 2942 Operand ne = context.VectorExtract(OperandType.I64, n, part); 2943 Operand me = context.VectorExtract(OperandType.I64, m, part); 2944 2945 res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me); 2946 } 2947 2948 context.Copy(GetVec(op.Rd), res); 2949 } 2950 } 2951 2952 public static void Raddhn_V(ArmEmitterContext context) 2953 { 2954 if (Optimizations.UseAdvSimd) 2955 { 2956 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV); 2957 } 2958 else 2959 { 2960 EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); 2961 } 2962 } 2963 2964 public static void Rsubhn_V(ArmEmitterContext context) 2965 { 2966 if (Optimizations.UseAdvSimd) 2967 { 2968 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV); 2969 } 2970 else 2971 { 2972 EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); 2973 } 2974 } 2975 2976 public static void Saba_V(ArmEmitterContext context) 2977 { 2978 if (Optimizations.UseAdvSimd) 2979 { 2980 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV); 2981 } 2982 else 2983 { 2984 EmitVectorTernaryOpSx(context, (op1, op2, op3) => 2985 { 2986 return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); 2987 }); 2988 } 2989 } 2990 2991 public static void Sabal_V(ArmEmitterContext context) 2992 { 2993 if (Optimizations.UseAdvSimd) 2994 { 2995 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV); 2996 } 2997 else 2998 { 2999 EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => 3000 { 3001 return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); 3002 }); 3003 } 3004 } 3005 3006 public static void Sabd_V(ArmEmitterContext context) 3007 { 3008 if (Optimizations.UseAdvSimd) 3009 { 3010 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV); 3011 } 3012 else if (Optimizations.UseSse41) 3013 { 3014 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3015 3016 Operand n = GetVec(op.Rn); 3017 Operand m = GetVec(op.Rm); 3018 3019 EmitSse41VectorSabdOp(context, op, n, m, isLong: false); 3020 } 3021 else 3022 { 3023 EmitVectorBinaryOpSx(context, (op1, op2) => 3024 { 3025 return EmitAbs(context, context.Subtract(op1, op2)); 3026 }); 3027 } 3028 } 3029 3030 public static void Sabdl_V(ArmEmitterContext context) 3031 { 3032 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3033 3034 if (Optimizations.UseAdvSimd) 3035 { 3036 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV); 3037 } 3038 else if (Optimizations.UseSse41 && op.Size < 2) 3039 { 3040 Operand n = GetVec(op.Rn); 3041 Operand m = GetVec(op.Rm); 3042 3043 if (op.RegisterSize == RegisterSize.Simd128) 3044 { 3045 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 3046 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3047 } 3048 3049 Intrinsic movInst = op.Size == 0 3050 ? Intrinsic.X86Pmovsxbw 3051 : Intrinsic.X86Pmovsxwd; 3052 3053 n = context.AddIntrinsic(movInst, n); 3054 m = context.AddIntrinsic(movInst, m); 3055 3056 EmitSse41VectorSabdOp(context, op, n, m, isLong: true); 3057 } 3058 else 3059 { 3060 EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => 3061 { 3062 return EmitAbs(context, context.Subtract(op1, op2)); 3063 }); 3064 } 3065 } 3066 3067 public static void Sadalp_V(ArmEmitterContext context) 3068 { 3069 if (Optimizations.UseAdvSimd) 3070 { 3071 InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV); 3072 } 3073 else 3074 { 3075 EmitAddLongPairwise(context, signed: true, accumulate: true); 3076 } 3077 } 3078 3079 public static void Saddl_V(ArmEmitterContext context) 3080 { 3081 if (Optimizations.UseAdvSimd) 3082 { 3083 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV); 3084 } 3085 else if (Optimizations.UseSse41) 3086 { 3087 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3088 3089 Operand n = GetVec(op.Rn); 3090 Operand m = GetVec(op.Rm); 3091 3092 if (op.RegisterSize == RegisterSize.Simd128) 3093 { 3094 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 3095 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3096 } 3097 3098 Intrinsic movInst = X86PmovsxInstruction[op.Size]; 3099 3100 n = context.AddIntrinsic(movInst, n); 3101 m = context.AddIntrinsic(movInst, m); 3102 3103 Intrinsic addInst = X86PaddInstruction[op.Size + 1]; 3104 3105 context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); 3106 } 3107 else 3108 { 3109 EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); 3110 } 3111 } 3112 3113 public static void Saddlp_V(ArmEmitterContext context) 3114 { 3115 if (Optimizations.UseAdvSimd) 3116 { 3117 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV); 3118 } 3119 else 3120 { 3121 EmitAddLongPairwise(context, signed: true, accumulate: false); 3122 } 3123 } 3124 3125 public static void Saddlv_V(ArmEmitterContext context) 3126 { 3127 if (Optimizations.UseAdvSimd) 3128 { 3129 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV); 3130 } 3131 else 3132 { 3133 EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); 3134 } 3135 } 3136 3137 public static void Saddw_V(ArmEmitterContext context) 3138 { 3139 if (Optimizations.UseAdvSimd) 3140 { 3141 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV); 3142 } 3143 else if (Optimizations.UseSse41) 3144 { 3145 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3146 3147 Operand n = GetVec(op.Rn); 3148 Operand m = GetVec(op.Rm); 3149 3150 if (op.RegisterSize == RegisterSize.Simd128) 3151 { 3152 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3153 } 3154 3155 Intrinsic movInst = X86PmovsxInstruction[op.Size]; 3156 3157 m = context.AddIntrinsic(movInst, m); 3158 3159 Intrinsic addInst = X86PaddInstruction[op.Size + 1]; 3160 3161 context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); 3162 } 3163 else 3164 { 3165 EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); 3166 } 3167 } 3168 3169 public static void Shadd_V(ArmEmitterContext context) 3170 { 3171 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3172 3173 if (Optimizations.UseAdvSimd) 3174 { 3175 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV); 3176 } 3177 else if (Optimizations.UseSse2 && op.Size > 0) 3178 { 3179 Operand n = GetVec(op.Rn); 3180 Operand m = GetVec(op.Rm); 3181 3182 Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); 3183 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); 3184 3185 Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad; 3186 3187 res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); 3188 3189 Intrinsic addInst = X86PaddInstruction[op.Size]; 3190 3191 res = context.AddIntrinsic(addInst, res, res2); 3192 3193 if (op.RegisterSize == RegisterSize.Simd64) 3194 { 3195 res = context.VectorZeroUpper64(res); 3196 } 3197 3198 context.Copy(GetVec(op.Rd), res); 3199 } 3200 else 3201 { 3202 EmitVectorBinaryOpSx(context, (op1, op2) => 3203 { 3204 return context.ShiftRightSI(context.Add(op1, op2), Const(1)); 3205 }); 3206 } 3207 } 3208 3209 public static void Shsub_V(ArmEmitterContext context) 3210 { 3211 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3212 3213 if (Optimizations.UseAdvSimd) 3214 { 3215 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV); 3216 } 3217 else if (Optimizations.UseSse2 && op.Size < 2) 3218 { 3219 Operand n = GetVec(op.Rn); 3220 Operand m = GetVec(op.Rm); 3221 3222 Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); 3223 3224 Intrinsic addInst = X86PaddInstruction[op.Size]; 3225 3226 Operand nPlusMask = context.AddIntrinsic(addInst, n, mask); 3227 Operand mPlusMask = context.AddIntrinsic(addInst, m, mask); 3228 3229 Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; 3230 3231 Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask); 3232 3233 Intrinsic subInst = X86PsubInstruction[op.Size]; 3234 3235 res = context.AddIntrinsic(subInst, nPlusMask, res); 3236 3237 if (op.RegisterSize == RegisterSize.Simd64) 3238 { 3239 res = context.VectorZeroUpper64(res); 3240 } 3241 3242 context.Copy(GetVec(op.Rd), res); 3243 } 3244 else 3245 { 3246 EmitVectorBinaryOpSx(context, (op1, op2) => 3247 { 3248 return context.ShiftRightSI(context.Subtract(op1, op2), Const(1)); 3249 }); 3250 } 3251 } 3252 3253 public static void Smax_V(ArmEmitterContext context) 3254 { 3255 if (Optimizations.UseAdvSimd) 3256 { 3257 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV); 3258 } 3259 else if (Optimizations.UseSse41) 3260 { 3261 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3262 3263 Operand n = GetVec(op.Rn); 3264 Operand m = GetVec(op.Rm); 3265 3266 Intrinsic maxInst = X86PmaxsInstruction[op.Size]; 3267 3268 Operand res = context.AddIntrinsic(maxInst, n, m); 3269 3270 if (op.RegisterSize == RegisterSize.Simd64) 3271 { 3272 res = context.VectorZeroUpper64(res); 3273 } 3274 3275 context.Copy(GetVec(op.Rd), res); 3276 } 3277 else 3278 { 3279 EmitVectorBinaryOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); 3280 } 3281 } 3282 3283 public static void Smaxp_V(ArmEmitterContext context) 3284 { 3285 if (Optimizations.UseAdvSimd) 3286 { 3287 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV); 3288 } 3289 else if (Optimizations.UseSsse3) 3290 { 3291 EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction); 3292 } 3293 else 3294 { 3295 EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); 3296 } 3297 } 3298 3299 public static void Smaxv_V(ArmEmitterContext context) 3300 { 3301 if (Optimizations.UseAdvSimd) 3302 { 3303 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV); 3304 } 3305 else 3306 { 3307 EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); 3308 } 3309 } 3310 3311 public static void Smin_V(ArmEmitterContext context) 3312 { 3313 if (Optimizations.UseAdvSimd) 3314 { 3315 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV); 3316 } 3317 else if (Optimizations.UseSse41) 3318 { 3319 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3320 3321 Operand n = GetVec(op.Rn); 3322 Operand m = GetVec(op.Rm); 3323 3324 Intrinsic minInst = X86PminsInstruction[op.Size]; 3325 3326 Operand res = context.AddIntrinsic(minInst, n, m); 3327 3328 if (op.RegisterSize == RegisterSize.Simd64) 3329 { 3330 res = context.VectorZeroUpper64(res); 3331 } 3332 3333 context.Copy(GetVec(op.Rd), res); 3334 } 3335 else 3336 { 3337 EmitVectorBinaryOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); 3338 } 3339 } 3340 3341 public static void Sminp_V(ArmEmitterContext context) 3342 { 3343 if (Optimizations.UseAdvSimd) 3344 { 3345 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV); 3346 } 3347 else if (Optimizations.UseSsse3) 3348 { 3349 EmitSsse3VectorPairwiseOp(context, X86PminsInstruction); 3350 } 3351 else 3352 { 3353 EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); 3354 } 3355 } 3356 3357 public static void Sminv_V(ArmEmitterContext context) 3358 { 3359 if (Optimizations.UseAdvSimd) 3360 { 3361 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV); 3362 } 3363 else 3364 { 3365 EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); 3366 } 3367 } 3368 3369 public static void Smlal_V(ArmEmitterContext context) 3370 { 3371 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3372 3373 if (Optimizations.UseAdvSimd) 3374 { 3375 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV); 3376 } 3377 else if (Optimizations.UseSse41 && op.Size < 2) 3378 { 3379 Operand d = GetVec(op.Rd); 3380 Operand n = GetVec(op.Rn); 3381 Operand m = GetVec(op.Rm); 3382 3383 if (op.RegisterSize == RegisterSize.Simd128) 3384 { 3385 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 3386 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3387 } 3388 3389 Intrinsic movInst = X86PmovsxInstruction[op.Size]; 3390 3391 n = context.AddIntrinsic(movInst, n); 3392 m = context.AddIntrinsic(movInst, m); 3393 3394 Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; 3395 3396 Operand res = context.AddIntrinsic(mullInst, n, m); 3397 3398 Intrinsic addInst = X86PaddInstruction[op.Size + 1]; 3399 3400 context.Copy(d, context.AddIntrinsic(addInst, d, res)); 3401 } 3402 else 3403 { 3404 EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => 3405 { 3406 return context.Add(op1, context.Multiply(op2, op3)); 3407 }); 3408 } 3409 } 3410 3411 public static void Smlal_Ve(ArmEmitterContext context) 3412 { 3413 if (Optimizations.UseAdvSimd) 3414 { 3415 InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe); 3416 } 3417 else 3418 { 3419 EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => 3420 { 3421 return context.Add(op1, context.Multiply(op2, op3)); 3422 }); 3423 } 3424 } 3425 3426 public static void Smlsl_V(ArmEmitterContext context) 3427 { 3428 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3429 3430 if (Optimizations.UseAdvSimd) 3431 { 3432 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV); 3433 } 3434 else if (Optimizations.UseSse41 && op.Size < 2) 3435 { 3436 Operand d = GetVec(op.Rd); 3437 Operand n = GetVec(op.Rn); 3438 Operand m = GetVec(op.Rm); 3439 3440 if (op.RegisterSize == RegisterSize.Simd128) 3441 { 3442 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 3443 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3444 } 3445 3446 Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovsxbw : Intrinsic.X86Pmovsxwd; 3447 3448 n = context.AddIntrinsic(movInst, n); 3449 m = context.AddIntrinsic(movInst, m); 3450 3451 Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; 3452 3453 Operand res = context.AddIntrinsic(mullInst, n, m); 3454 3455 Intrinsic subInst = X86PsubInstruction[op.Size + 1]; 3456 3457 context.Copy(d, context.AddIntrinsic(subInst, d, res)); 3458 } 3459 else 3460 { 3461 EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => 3462 { 3463 return context.Subtract(op1, context.Multiply(op2, op3)); 3464 }); 3465 } 3466 } 3467 3468 public static void Smlsl_Ve(ArmEmitterContext context) 3469 { 3470 if (Optimizations.UseAdvSimd) 3471 { 3472 InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe); 3473 } 3474 else 3475 { 3476 EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => 3477 { 3478 return context.Subtract(op1, context.Multiply(op2, op3)); 3479 }); 3480 } 3481 } 3482 3483 public static void Smull_V(ArmEmitterContext context) 3484 { 3485 if (Optimizations.UseAdvSimd) 3486 { 3487 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV); 3488 } 3489 else 3490 { 3491 EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); 3492 } 3493 } 3494 3495 public static void Smull_Ve(ArmEmitterContext context) 3496 { 3497 if (Optimizations.UseAdvSimd) 3498 { 3499 InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe); 3500 } 3501 else 3502 { 3503 EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); 3504 } 3505 } 3506 3507 public static void Sqabs_S(ArmEmitterContext context) 3508 { 3509 if (Optimizations.UseAdvSimd) 3510 { 3511 InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS); 3512 } 3513 else 3514 { 3515 EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); 3516 } 3517 } 3518 3519 public static void Sqabs_V(ArmEmitterContext context) 3520 { 3521 if (Optimizations.UseAdvSimd) 3522 { 3523 InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV); 3524 } 3525 else 3526 { 3527 EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); 3528 } 3529 } 3530 3531 public static void Sqadd_S(ArmEmitterContext context) 3532 { 3533 if (Optimizations.UseAdvSimd) 3534 { 3535 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS); 3536 } 3537 else 3538 { 3539 EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); 3540 } 3541 } 3542 3543 public static void Sqadd_V(ArmEmitterContext context) 3544 { 3545 if (Optimizations.UseAdvSimd) 3546 { 3547 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV); 3548 } 3549 else 3550 { 3551 EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); 3552 } 3553 } 3554 3555 public static void Sqdmulh_S(ArmEmitterContext context) 3556 { 3557 if (Optimizations.UseAdvSimd) 3558 { 3559 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS); 3560 } 3561 else 3562 { 3563 EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); 3564 } 3565 } 3566 3567 public static void Sqdmulh_V(ArmEmitterContext context) 3568 { 3569 if (Optimizations.UseAdvSimd) 3570 { 3571 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV); 3572 } 3573 else 3574 { 3575 EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); 3576 } 3577 } 3578 3579 public static void Sqdmulh_Ve(ArmEmitterContext context) 3580 { 3581 if (Optimizations.UseAdvSimd) 3582 { 3583 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe); 3584 } 3585 else 3586 { 3587 EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); 3588 } 3589 } 3590 3591 public static void Sqneg_S(ArmEmitterContext context) 3592 { 3593 if (Optimizations.UseAdvSimd) 3594 { 3595 InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS); 3596 } 3597 else 3598 { 3599 EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); 3600 } 3601 } 3602 3603 public static void Sqneg_V(ArmEmitterContext context) 3604 { 3605 if (Optimizations.UseAdvSimd) 3606 { 3607 InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV); 3608 } 3609 else 3610 { 3611 EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); 3612 } 3613 } 3614 3615 public static void Sqrdmulh_S(ArmEmitterContext context) 3616 { 3617 if (Optimizations.UseAdvSimd) 3618 { 3619 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS); 3620 } 3621 else 3622 { 3623 EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); 3624 } 3625 } 3626 3627 public static void Sqrdmulh_V(ArmEmitterContext context) 3628 { 3629 if (Optimizations.UseAdvSimd) 3630 { 3631 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV); 3632 } 3633 else 3634 { 3635 EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); 3636 } 3637 } 3638 3639 public static void Sqrdmulh_Ve(ArmEmitterContext context) 3640 { 3641 if (Optimizations.UseAdvSimd) 3642 { 3643 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe); 3644 } 3645 else 3646 { 3647 EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); 3648 } 3649 } 3650 3651 public static void Sqsub_S(ArmEmitterContext context) 3652 { 3653 if (Optimizations.UseAdvSimd) 3654 { 3655 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS); 3656 } 3657 else 3658 { 3659 EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); 3660 } 3661 } 3662 3663 public static void Sqsub_V(ArmEmitterContext context) 3664 { 3665 if (Optimizations.UseAdvSimd) 3666 { 3667 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV); 3668 } 3669 else 3670 { 3671 EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); 3672 } 3673 } 3674 3675 public static void Sqxtn_S(ArmEmitterContext context) 3676 { 3677 if (Optimizations.UseAdvSimd) 3678 { 3679 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS); 3680 } 3681 else 3682 { 3683 EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); 3684 } 3685 } 3686 3687 public static void Sqxtn_V(ArmEmitterContext context) 3688 { 3689 if (Optimizations.UseAdvSimd) 3690 { 3691 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV); 3692 } 3693 else 3694 { 3695 EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); 3696 } 3697 } 3698 3699 public static void Sqxtun_S(ArmEmitterContext context) 3700 { 3701 if (Optimizations.UseAdvSimd) 3702 { 3703 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS); 3704 } 3705 else 3706 { 3707 EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); 3708 } 3709 } 3710 3711 public static void Sqxtun_V(ArmEmitterContext context) 3712 { 3713 if (Optimizations.UseAdvSimd) 3714 { 3715 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV); 3716 } 3717 else 3718 { 3719 EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); 3720 } 3721 } 3722 3723 public static void Srhadd_V(ArmEmitterContext context) 3724 { 3725 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3726 3727 if (Optimizations.UseAdvSimd) 3728 { 3729 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV); 3730 } 3731 else if (Optimizations.UseSse2 && op.Size < 2) 3732 { 3733 Operand n = GetVec(op.Rn); 3734 Operand m = GetVec(op.Rm); 3735 3736 Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); 3737 3738 Intrinsic subInst = X86PsubInstruction[op.Size]; 3739 3740 Operand nMinusMask = context.AddIntrinsic(subInst, n, mask); 3741 Operand mMinusMask = context.AddIntrinsic(subInst, m, mask); 3742 3743 Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; 3744 3745 Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask); 3746 3747 Intrinsic addInst = X86PaddInstruction[op.Size]; 3748 3749 res = context.AddIntrinsic(addInst, mask, res); 3750 3751 if (op.RegisterSize == RegisterSize.Simd64) 3752 { 3753 res = context.VectorZeroUpper64(res); 3754 } 3755 3756 context.Copy(GetVec(op.Rd), res); 3757 } 3758 else 3759 { 3760 EmitVectorBinaryOpSx(context, (op1, op2) => 3761 { 3762 Operand res = context.Add(op1, op2); 3763 3764 res = context.Add(res, Const(1L)); 3765 3766 return context.ShiftRightSI(res, Const(1)); 3767 }); 3768 } 3769 } 3770 3771 public static void Ssubl_V(ArmEmitterContext context) 3772 { 3773 if (Optimizations.UseAdvSimd) 3774 { 3775 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV); 3776 } 3777 else if (Optimizations.UseSse41) 3778 { 3779 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3780 3781 Operand n = GetVec(op.Rn); 3782 Operand m = GetVec(op.Rm); 3783 3784 if (op.RegisterSize == RegisterSize.Simd128) 3785 { 3786 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 3787 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3788 } 3789 3790 Intrinsic movInst = X86PmovsxInstruction[op.Size]; 3791 3792 n = context.AddIntrinsic(movInst, n); 3793 m = context.AddIntrinsic(movInst, m); 3794 3795 Intrinsic subInst = X86PsubInstruction[op.Size + 1]; 3796 3797 context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); 3798 } 3799 else 3800 { 3801 EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); 3802 } 3803 } 3804 3805 public static void Ssubw_V(ArmEmitterContext context) 3806 { 3807 if (Optimizations.UseAdvSimd) 3808 { 3809 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV); 3810 } 3811 else if (Optimizations.UseSse41) 3812 { 3813 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3814 3815 Operand n = GetVec(op.Rn); 3816 Operand m = GetVec(op.Rm); 3817 3818 if (op.RegisterSize == RegisterSize.Simd128) 3819 { 3820 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3821 } 3822 3823 Intrinsic movInst = X86PmovsxInstruction[op.Size]; 3824 3825 m = context.AddIntrinsic(movInst, m); 3826 3827 Intrinsic subInst = X86PsubInstruction[op.Size + 1]; 3828 3829 context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); 3830 } 3831 else 3832 { 3833 EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); 3834 } 3835 } 3836 3837 public static void Sub_S(ArmEmitterContext context) 3838 { 3839 if (Optimizations.UseAdvSimd) 3840 { 3841 InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS); 3842 } 3843 else 3844 { 3845 EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); 3846 } 3847 } 3848 3849 public static void Sub_V(ArmEmitterContext context) 3850 { 3851 if (Optimizations.UseAdvSimd) 3852 { 3853 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV); 3854 } 3855 else if (Optimizations.UseSse2) 3856 { 3857 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3858 3859 Operand n = GetVec(op.Rn); 3860 Operand m = GetVec(op.Rm); 3861 3862 Intrinsic subInst = X86PsubInstruction[op.Size]; 3863 3864 Operand res = context.AddIntrinsic(subInst, n, m); 3865 3866 if (op.RegisterSize == RegisterSize.Simd64) 3867 { 3868 res = context.VectorZeroUpper64(res); 3869 } 3870 3871 context.Copy(GetVec(op.Rd), res); 3872 } 3873 else 3874 { 3875 EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); 3876 } 3877 } 3878 3879 public static void Subhn_V(ArmEmitterContext context) 3880 { 3881 if (Optimizations.UseAdvSimd) 3882 { 3883 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV); 3884 } 3885 else 3886 { 3887 EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); 3888 } 3889 } 3890 3891 public static void Suqadd_S(ArmEmitterContext context) 3892 { 3893 if (Optimizations.UseAdvSimd) 3894 { 3895 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS); 3896 } 3897 else 3898 { 3899 EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); 3900 } 3901 } 3902 3903 public static void Suqadd_V(ArmEmitterContext context) 3904 { 3905 if (Optimizations.UseAdvSimd) 3906 { 3907 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV); 3908 } 3909 else 3910 { 3911 EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); 3912 } 3913 } 3914 3915 public static void Uaba_V(ArmEmitterContext context) 3916 { 3917 if (Optimizations.UseAdvSimd) 3918 { 3919 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV); 3920 } 3921 else 3922 { 3923 EmitVectorTernaryOpZx(context, (op1, op2, op3) => 3924 { 3925 return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); 3926 }); 3927 } 3928 } 3929 3930 public static void Uabal_V(ArmEmitterContext context) 3931 { 3932 if (Optimizations.UseAdvSimd) 3933 { 3934 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV); 3935 } 3936 else 3937 { 3938 EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => 3939 { 3940 return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); 3941 }); 3942 } 3943 } 3944 3945 public static void Uabd_V(ArmEmitterContext context) 3946 { 3947 if (Optimizations.UseAdvSimd) 3948 { 3949 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV); 3950 } 3951 else if (Optimizations.UseSse41) 3952 { 3953 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3954 3955 Operand n = GetVec(op.Rn); 3956 Operand m = GetVec(op.Rm); 3957 3958 EmitSse41VectorUabdOp(context, op, n, m, isLong: false); 3959 } 3960 else 3961 { 3962 EmitVectorBinaryOpZx(context, (op1, op2) => 3963 { 3964 return EmitAbs(context, context.Subtract(op1, op2)); 3965 }); 3966 } 3967 } 3968 3969 public static void Uabdl_V(ArmEmitterContext context) 3970 { 3971 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 3972 3973 if (Optimizations.UseAdvSimd) 3974 { 3975 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV); 3976 } 3977 else if (Optimizations.UseSse41 && op.Size < 2) 3978 { 3979 Operand n = GetVec(op.Rn); 3980 Operand m = GetVec(op.Rm); 3981 3982 if (op.RegisterSize == RegisterSize.Simd128) 3983 { 3984 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 3985 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 3986 } 3987 3988 Intrinsic movInst = op.Size == 0 3989 ? Intrinsic.X86Pmovzxbw 3990 : Intrinsic.X86Pmovzxwd; 3991 3992 n = context.AddIntrinsic(movInst, n); 3993 m = context.AddIntrinsic(movInst, m); 3994 3995 EmitSse41VectorUabdOp(context, op, n, m, isLong: true); 3996 } 3997 else 3998 { 3999 EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => 4000 { 4001 return EmitAbs(context, context.Subtract(op1, op2)); 4002 }); 4003 } 4004 } 4005 4006 public static void Uadalp_V(ArmEmitterContext context) 4007 { 4008 if (Optimizations.UseAdvSimd) 4009 { 4010 InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV); 4011 } 4012 else 4013 { 4014 EmitAddLongPairwise(context, signed: false, accumulate: true); 4015 } 4016 } 4017 4018 public static void Uaddl_V(ArmEmitterContext context) 4019 { 4020 if (Optimizations.UseAdvSimd) 4021 { 4022 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV); 4023 } 4024 else if (Optimizations.UseSse41) 4025 { 4026 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4027 4028 Operand n = GetVec(op.Rn); 4029 Operand m = GetVec(op.Rm); 4030 4031 if (op.RegisterSize == RegisterSize.Simd128) 4032 { 4033 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 4034 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 4035 } 4036 4037 Intrinsic movInst = X86PmovzxInstruction[op.Size]; 4038 4039 n = context.AddIntrinsic(movInst, n); 4040 m = context.AddIntrinsic(movInst, m); 4041 4042 Intrinsic addInst = X86PaddInstruction[op.Size + 1]; 4043 4044 context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); 4045 } 4046 else 4047 { 4048 EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); 4049 } 4050 } 4051 4052 public static void Uaddlp_V(ArmEmitterContext context) 4053 { 4054 if (Optimizations.UseAdvSimd) 4055 { 4056 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV); 4057 } 4058 else 4059 { 4060 EmitAddLongPairwise(context, signed: false, accumulate: false); 4061 } 4062 } 4063 4064 public static void Uaddlv_V(ArmEmitterContext context) 4065 { 4066 if (Optimizations.UseAdvSimd) 4067 { 4068 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV); 4069 } 4070 else 4071 { 4072 EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); 4073 } 4074 } 4075 4076 public static void Uaddw_V(ArmEmitterContext context) 4077 { 4078 if (Optimizations.UseAdvSimd) 4079 { 4080 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV); 4081 } 4082 else if (Optimizations.UseSse41) 4083 { 4084 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4085 4086 Operand n = GetVec(op.Rn); 4087 Operand m = GetVec(op.Rm); 4088 4089 if (op.RegisterSize == RegisterSize.Simd128) 4090 { 4091 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 4092 } 4093 4094 Intrinsic movInst = X86PmovzxInstruction[op.Size]; 4095 4096 m = context.AddIntrinsic(movInst, m); 4097 4098 Intrinsic addInst = X86PaddInstruction[op.Size + 1]; 4099 4100 context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); 4101 } 4102 else 4103 { 4104 EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); 4105 } 4106 } 4107 4108 public static void Uhadd_V(ArmEmitterContext context) 4109 { 4110 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4111 4112 if (Optimizations.UseAdvSimd) 4113 { 4114 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV); 4115 } 4116 else if (Optimizations.UseSse2 && op.Size > 0) 4117 { 4118 Operand n = GetVec(op.Rn); 4119 Operand m = GetVec(op.Rm); 4120 4121 Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); 4122 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); 4123 4124 Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld; 4125 4126 res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); 4127 4128 Intrinsic addInst = X86PaddInstruction[op.Size]; 4129 4130 res = context.AddIntrinsic(addInst, res, res2); 4131 4132 if (op.RegisterSize == RegisterSize.Simd64) 4133 { 4134 res = context.VectorZeroUpper64(res); 4135 } 4136 4137 context.Copy(GetVec(op.Rd), res); 4138 } 4139 else 4140 { 4141 EmitVectorBinaryOpZx(context, (op1, op2) => 4142 { 4143 return context.ShiftRightUI(context.Add(op1, op2), Const(1)); 4144 }); 4145 } 4146 } 4147 4148 public static void Uhsub_V(ArmEmitterContext context) 4149 { 4150 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4151 4152 if (Optimizations.UseAdvSimd) 4153 { 4154 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV); 4155 } 4156 else if (Optimizations.UseSse2 && op.Size < 2) 4157 { 4158 Operand n = GetVec(op.Rn); 4159 Operand m = GetVec(op.Rm); 4160 4161 Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; 4162 4163 Operand res = context.AddIntrinsic(avgInst, n, m); 4164 4165 Intrinsic subInst = X86PsubInstruction[op.Size]; 4166 4167 res = context.AddIntrinsic(subInst, n, res); 4168 4169 if (op.RegisterSize == RegisterSize.Simd64) 4170 { 4171 res = context.VectorZeroUpper64(res); 4172 } 4173 4174 context.Copy(GetVec(op.Rd), res); 4175 } 4176 else 4177 { 4178 EmitVectorBinaryOpZx(context, (op1, op2) => 4179 { 4180 return context.ShiftRightUI(context.Subtract(op1, op2), Const(1)); 4181 }); 4182 } 4183 } 4184 4185 public static void Umax_V(ArmEmitterContext context) 4186 { 4187 if (Optimizations.UseAdvSimd) 4188 { 4189 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV); 4190 } 4191 else if (Optimizations.UseSse41) 4192 { 4193 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4194 4195 Operand n = GetVec(op.Rn); 4196 Operand m = GetVec(op.Rm); 4197 4198 Intrinsic maxInst = X86PmaxuInstruction[op.Size]; 4199 4200 Operand res = context.AddIntrinsic(maxInst, n, m); 4201 4202 if (op.RegisterSize == RegisterSize.Simd64) 4203 { 4204 res = context.VectorZeroUpper64(res); 4205 } 4206 4207 context.Copy(GetVec(op.Rd), res); 4208 } 4209 else 4210 { 4211 EmitVectorBinaryOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); 4212 } 4213 } 4214 4215 public static void Umaxp_V(ArmEmitterContext context) 4216 { 4217 if (Optimizations.UseAdvSimd) 4218 { 4219 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV); 4220 } 4221 else if (Optimizations.UseSsse3) 4222 { 4223 EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction); 4224 } 4225 else 4226 { 4227 EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); 4228 } 4229 } 4230 4231 public static void Umaxv_V(ArmEmitterContext context) 4232 { 4233 if (Optimizations.UseAdvSimd) 4234 { 4235 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV); 4236 } 4237 else 4238 { 4239 EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); 4240 } 4241 } 4242 4243 public static void Umin_V(ArmEmitterContext context) 4244 { 4245 if (Optimizations.UseAdvSimd) 4246 { 4247 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV); 4248 } 4249 else if (Optimizations.UseSse41) 4250 { 4251 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4252 4253 Operand n = GetVec(op.Rn); 4254 Operand m = GetVec(op.Rm); 4255 4256 Intrinsic minInst = X86PminuInstruction[op.Size]; 4257 4258 Operand res = context.AddIntrinsic(minInst, n, m); 4259 4260 if (op.RegisterSize == RegisterSize.Simd64) 4261 { 4262 res = context.VectorZeroUpper64(res); 4263 } 4264 4265 context.Copy(GetVec(op.Rd), res); 4266 } 4267 else 4268 { 4269 EmitVectorBinaryOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); 4270 } 4271 } 4272 4273 public static void Uminp_V(ArmEmitterContext context) 4274 { 4275 if (Optimizations.UseAdvSimd) 4276 { 4277 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV); 4278 } 4279 else if (Optimizations.UseSsse3) 4280 { 4281 EmitSsse3VectorPairwiseOp(context, X86PminuInstruction); 4282 } 4283 else 4284 { 4285 EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); 4286 } 4287 } 4288 4289 public static void Uminv_V(ArmEmitterContext context) 4290 { 4291 if (Optimizations.UseAdvSimd) 4292 { 4293 InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV); 4294 } 4295 else 4296 { 4297 EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); 4298 } 4299 } 4300 4301 public static void Umlal_V(ArmEmitterContext context) 4302 { 4303 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4304 4305 if (Optimizations.UseAdvSimd) 4306 { 4307 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV); 4308 } 4309 else if (Optimizations.UseSse41 && op.Size < 2) 4310 { 4311 Operand d = GetVec(op.Rd); 4312 Operand n = GetVec(op.Rn); 4313 Operand m = GetVec(op.Rm); 4314 4315 if (op.RegisterSize == RegisterSize.Simd128) 4316 { 4317 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 4318 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 4319 } 4320 4321 Intrinsic movInst = X86PmovzxInstruction[op.Size]; 4322 4323 n = context.AddIntrinsic(movInst, n); 4324 m = context.AddIntrinsic(movInst, m); 4325 4326 Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; 4327 4328 Operand res = context.AddIntrinsic(mullInst, n, m); 4329 4330 Intrinsic addInst = X86PaddInstruction[op.Size + 1]; 4331 4332 context.Copy(d, context.AddIntrinsic(addInst, d, res)); 4333 } 4334 else 4335 { 4336 EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => 4337 { 4338 return context.Add(op1, context.Multiply(op2, op3)); 4339 }); 4340 } 4341 } 4342 4343 public static void Umlal_Ve(ArmEmitterContext context) 4344 { 4345 if (Optimizations.UseAdvSimd) 4346 { 4347 InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe); 4348 } 4349 else 4350 { 4351 EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => 4352 { 4353 return context.Add(op1, context.Multiply(op2, op3)); 4354 }); 4355 } 4356 } 4357 4358 public static void Umlsl_V(ArmEmitterContext context) 4359 { 4360 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4361 4362 if (Optimizations.UseAdvSimd) 4363 { 4364 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV); 4365 } 4366 else if (Optimizations.UseSse41 && op.Size < 2) 4367 { 4368 Operand d = GetVec(op.Rd); 4369 Operand n = GetVec(op.Rn); 4370 Operand m = GetVec(op.Rm); 4371 4372 if (op.RegisterSize == RegisterSize.Simd128) 4373 { 4374 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 4375 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 4376 } 4377 4378 Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovzxbw : Intrinsic.X86Pmovzxwd; 4379 4380 n = context.AddIntrinsic(movInst, n); 4381 m = context.AddIntrinsic(movInst, m); 4382 4383 Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; 4384 4385 Operand res = context.AddIntrinsic(mullInst, n, m); 4386 4387 Intrinsic subInst = X86PsubInstruction[op.Size + 1]; 4388 4389 context.Copy(d, context.AddIntrinsic(subInst, d, res)); 4390 } 4391 else 4392 { 4393 EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => 4394 { 4395 return context.Subtract(op1, context.Multiply(op2, op3)); 4396 }); 4397 } 4398 } 4399 4400 public static void Umlsl_Ve(ArmEmitterContext context) 4401 { 4402 if (Optimizations.UseAdvSimd) 4403 { 4404 InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe); 4405 } 4406 else 4407 { 4408 EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => 4409 { 4410 return context.Subtract(op1, context.Multiply(op2, op3)); 4411 }); 4412 } 4413 } 4414 4415 public static void Umull_V(ArmEmitterContext context) 4416 { 4417 if (Optimizations.UseAdvSimd) 4418 { 4419 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV); 4420 } 4421 else 4422 { 4423 EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); 4424 } 4425 } 4426 4427 public static void Umull_Ve(ArmEmitterContext context) 4428 { 4429 if (Optimizations.UseAdvSimd) 4430 { 4431 InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe); 4432 } 4433 else 4434 { 4435 EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); 4436 } 4437 } 4438 4439 public static void Uqadd_S(ArmEmitterContext context) 4440 { 4441 if (Optimizations.UseAdvSimd) 4442 { 4443 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS); 4444 } 4445 else 4446 { 4447 EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); 4448 } 4449 } 4450 4451 public static void Uqadd_V(ArmEmitterContext context) 4452 { 4453 if (Optimizations.UseAdvSimd) 4454 { 4455 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV); 4456 } 4457 else 4458 { 4459 EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); 4460 } 4461 } 4462 4463 public static void Uqsub_S(ArmEmitterContext context) 4464 { 4465 if (Optimizations.UseAdvSimd) 4466 { 4467 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS); 4468 } 4469 else 4470 { 4471 EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); 4472 } 4473 } 4474 4475 public static void Uqsub_V(ArmEmitterContext context) 4476 { 4477 if (Optimizations.UseAdvSimd) 4478 { 4479 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV); 4480 } 4481 else 4482 { 4483 EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); 4484 } 4485 } 4486 4487 public static void Uqxtn_S(ArmEmitterContext context) 4488 { 4489 if (Optimizations.UseAdvSimd) 4490 { 4491 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS); 4492 } 4493 else 4494 { 4495 EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); 4496 } 4497 } 4498 4499 public static void Uqxtn_V(ArmEmitterContext context) 4500 { 4501 if (Optimizations.UseAdvSimd) 4502 { 4503 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV); 4504 } 4505 else 4506 { 4507 EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); 4508 } 4509 } 4510 4511 public static void Urhadd_V(ArmEmitterContext context) 4512 { 4513 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4514 4515 if (Optimizations.UseAdvSimd) 4516 { 4517 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV); 4518 } 4519 else if (Optimizations.UseSse2 && op.Size < 2) 4520 { 4521 Operand n = GetVec(op.Rn); 4522 Operand m = GetVec(op.Rm); 4523 4524 Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; 4525 4526 Operand res = context.AddIntrinsic(avgInst, n, m); 4527 4528 if (op.RegisterSize == RegisterSize.Simd64) 4529 { 4530 res = context.VectorZeroUpper64(res); 4531 } 4532 4533 context.Copy(GetVec(op.Rd), res); 4534 } 4535 else 4536 { 4537 EmitVectorBinaryOpZx(context, (op1, op2) => 4538 { 4539 Operand res = context.Add(op1, op2); 4540 4541 res = context.Add(res, Const(1L)); 4542 4543 return context.ShiftRightUI(res, Const(1)); 4544 }); 4545 } 4546 } 4547 4548 public static void Usqadd_S(ArmEmitterContext context) 4549 { 4550 if (Optimizations.UseAdvSimd) 4551 { 4552 InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS); 4553 } 4554 else 4555 { 4556 EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); 4557 } 4558 } 4559 4560 public static void Usqadd_V(ArmEmitterContext context) 4561 { 4562 if (Optimizations.UseAdvSimd) 4563 { 4564 InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV); 4565 } 4566 else 4567 { 4568 EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); 4569 } 4570 } 4571 4572 public static void Usubl_V(ArmEmitterContext context) 4573 { 4574 if (Optimizations.UseAdvSimd) 4575 { 4576 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV); 4577 } 4578 else if (Optimizations.UseSse41) 4579 { 4580 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4581 4582 Operand n = GetVec(op.Rn); 4583 Operand m = GetVec(op.Rm); 4584 4585 if (op.RegisterSize == RegisterSize.Simd128) 4586 { 4587 n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); 4588 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 4589 } 4590 4591 Intrinsic movInst = X86PmovzxInstruction[op.Size]; 4592 4593 n = context.AddIntrinsic(movInst, n); 4594 m = context.AddIntrinsic(movInst, m); 4595 4596 Intrinsic subInst = X86PsubInstruction[op.Size + 1]; 4597 4598 context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); 4599 } 4600 else 4601 { 4602 EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); 4603 } 4604 } 4605 4606 public static void Usubw_V(ArmEmitterContext context) 4607 { 4608 if (Optimizations.UseAdvSimd) 4609 { 4610 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV); 4611 } 4612 else if (Optimizations.UseSse41) 4613 { 4614 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4615 4616 Operand n = GetVec(op.Rn); 4617 Operand m = GetVec(op.Rm); 4618 4619 if (op.RegisterSize == RegisterSize.Simd128) 4620 { 4621 m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); 4622 } 4623 4624 Intrinsic movInst = X86PmovzxInstruction[op.Size]; 4625 4626 m = context.AddIntrinsic(movInst, m); 4627 4628 Intrinsic subInst = X86PsubInstruction[op.Size + 1]; 4629 4630 context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); 4631 } 4632 else 4633 { 4634 EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); 4635 } 4636 } 4637 4638 private static Operand EmitAbs(ArmEmitterContext context, Operand value) 4639 { 4640 Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); 4641 4642 return context.ConditionalSelect(isPositive, value, context.Negate(value)); 4643 } 4644 4645 private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate) 4646 { 4647 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 4648 4649 Operand res = context.VectorZero(); 4650 4651 int pairs = op.GetPairsCount() >> op.Size; 4652 4653 for (int index = 0; index < pairs; index++) 4654 { 4655 int pairIndex = index << 1; 4656 4657 Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); 4658 Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); 4659 4660 Operand e = context.Add(ne0, ne1); 4661 4662 if (accumulate) 4663 { 4664 Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); 4665 4666 e = context.Add(e, de); 4667 } 4668 4669 res = EmitVectorInsert(context, res, e, index, op.Size + 1); 4670 } 4671 4672 context.Copy(GetVec(op.Rd), res); 4673 } 4674 4675 private static Operand EmitDoublingMultiplyHighHalf( 4676 ArmEmitterContext context, 4677 Operand n, 4678 Operand m, 4679 bool round) 4680 { 4681 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4682 4683 int eSize = 8 << op.Size; 4684 4685 Operand res = context.Multiply(n, m); 4686 4687 if (!round) 4688 { 4689 res = context.ShiftRightSI(res, Const(eSize - 1)); 4690 } 4691 else 4692 { 4693 long roundConst = 1L << (eSize - 1); 4694 4695 res = context.ShiftLeft(res, Const(1)); 4696 4697 res = context.Add(res, Const(roundConst)); 4698 4699 res = context.ShiftRightSI(res, Const(eSize)); 4700 4701 Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue)); 4702 4703 res = context.ConditionalSelect(isIntMin, context.Negate(res), res); 4704 } 4705 4706 return res; 4707 } 4708 4709 private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round) 4710 { 4711 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 4712 4713 int elems = 8 >> op.Size; 4714 int eSize = 8 << op.Size; 4715 4716 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 4717 4718 Operand d = GetVec(op.Rd); 4719 4720 Operand res = part == 0 ? context.VectorZero() : context.Copy(d); 4721 4722 long roundConst = 1L << (eSize - 1); 4723 4724 for (int index = 0; index < elems; index++) 4725 { 4726 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); 4727 Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); 4728 4729 Operand de = emit(ne, me); 4730 4731 if (round) 4732 { 4733 de = context.Add(de, Const(roundConst)); 4734 } 4735 4736 de = context.ShiftRightUI(de, Const(eSize)); 4737 4738 res = EmitVectorInsert(context, res, de, part + index, op.Size); 4739 } 4740 4741 context.Copy(d, res); 4742 } 4743 4744 private static Operand EmitMax64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed) 4745 { 4746 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 4747 4748 Operand cmp = signed 4749 ? context.ICompareGreaterOrEqual(op1, op2) 4750 : context.ICompareGreaterOrEqualUI(op1, op2); 4751 4752 return context.ConditionalSelect(cmp, op1, op2); 4753 } 4754 4755 private static Operand EmitMin64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed) 4756 { 4757 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 4758 4759 Operand cmp = signed 4760 ? context.ICompareLessOrEqual(op1, op2) 4761 : context.ICompareLessOrEqualUI(op1, op2); 4762 4763 return context.ConditionalSelect(cmp, op1, op2); 4764 } 4765 4766 private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) 4767 { 4768 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 4769 4770 Operand n = GetVec(op.Rn); 4771 4772 Operand res; 4773 4774 if (roundMode != FPRoundingMode.ToNearestAway) 4775 { 4776 Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss; 4777 4778 res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); 4779 } 4780 else 4781 { 4782 res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true); 4783 } 4784 4785 if ((op.Size & 1) != 0) 4786 { 4787 res = context.VectorZeroUpper64(res); 4788 } 4789 else 4790 { 4791 res = context.VectorZeroUpper96(res); 4792 } 4793 4794 context.Copy(GetVec(op.Rd), res); 4795 } 4796 4797 private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) 4798 { 4799 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 4800 4801 Operand n = GetVec(op.Rn); 4802 4803 Operand res; 4804 4805 if (roundMode != FPRoundingMode.ToNearestAway) 4806 { 4807 Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps; 4808 4809 res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); 4810 } 4811 else 4812 { 4813 res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false); 4814 } 4815 4816 if (op.RegisterSize == RegisterSize.Simd64) 4817 { 4818 res = context.VectorZeroUpper64(res); 4819 } 4820 4821 context.Copy(GetVec(op.Rd), res); 4822 } 4823 4824 private static Operand EmitSse41Round32Exp8OpF(ArmEmitterContext context, Operand value, bool scalar) 4825 { 4826 Operand roundMask; 4827 Operand truncMask; 4828 Operand expMask; 4829 4830 if (scalar) 4831 { 4832 roundMask = X86GetScalar(context, 0x4000); 4833 truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000)); 4834 expMask = X86GetScalar(context, 0x7F800000); 4835 } 4836 else 4837 { 4838 roundMask = X86GetAllElements(context, 0x4000); 4839 truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000)); 4840 expMask = X86GetAllElements(context, 0x7F800000); 4841 } 4842 4843 Operand oValue = value; 4844 Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask); 4845 Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqd, masked, expMask); 4846 4847 value = context.AddIntrinsic(Intrinsic.X86Paddd, value, roundMask); 4848 value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask); 4849 4850 return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf); 4851 } 4852 4853 private static Operand EmitSse41RecipStepSelectOpF( 4854 ArmEmitterContext context, 4855 Operand n, 4856 Operand m, 4857 Operand res, 4858 Operand mask, 4859 bool scalar, 4860 int sizeF) 4861 { 4862 Intrinsic cmpOp; 4863 Intrinsic shlOp; 4864 Intrinsic blendOp; 4865 Operand zero = context.VectorZero(); 4866 Operand expMask; 4867 4868 if (sizeF == 0) 4869 { 4870 cmpOp = Intrinsic.X86Pcmpeqd; 4871 shlOp = Intrinsic.X86Pslld; 4872 blendOp = Intrinsic.X86Blendvps; 4873 expMask = scalar ? X86GetScalar(context, 0x7F800000 << 1) : X86GetAllElements(context, 0x7F800000 << 1); 4874 } 4875 else /* if (sizeF == 1) */ 4876 { 4877 cmpOp = Intrinsic.X86Pcmpeqq; 4878 shlOp = Intrinsic.X86Psllq; 4879 blendOp = Intrinsic.X86Blendvpd; 4880 expMask = scalar ? X86GetScalar(context, 0x7FF0000000000000L << 1) : X86GetAllElements(context, 0x7FF0000000000000L << 1); 4881 } 4882 4883 n = context.AddIntrinsic(shlOp, n, Const(1)); 4884 m = context.AddIntrinsic(shlOp, m, Const(1)); 4885 4886 Operand nZero = context.AddIntrinsic(cmpOp, n, zero); 4887 Operand mZero = context.AddIntrinsic(cmpOp, m, zero); 4888 Operand nInf = context.AddIntrinsic(cmpOp, n, expMask); 4889 Operand mInf = context.AddIntrinsic(cmpOp, m, expMask); 4890 4891 Operand nmZero = context.AddIntrinsic(Intrinsic.X86Por, nZero, mZero); 4892 Operand nmInf = context.AddIntrinsic(Intrinsic.X86Por, nInf, mInf); 4893 Operand nmZeroInf = context.AddIntrinsic(Intrinsic.X86Pand, nmZero, nmInf); 4894 4895 return context.AddIntrinsic(blendOp, res, mask, nmZeroInf); 4896 } 4897 4898 public static void EmitSse2VectorIsNaNOpF( 4899 ArmEmitterContext context, 4900 Operand opF, 4901 out Operand qNaNMask, 4902 out Operand sNaNMask, 4903 bool? isQNaN = null) 4904 { 4905 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 4906 4907 if ((op.Size & 1) == 0) 4908 { 4909 const int QBit = 22; 4910 4911 Operand qMask = X86GetAllElements(context, 1 << QBit); 4912 4913 Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ)); 4914 4915 Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); 4916 mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal)); 4917 4918 qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default; 4919 sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default; 4920 } 4921 else /* if ((op.Size & 1) == 1) */ 4922 { 4923 const int QBit = 51; 4924 4925 Operand qMask = X86GetAllElements(context, 1L << QBit); 4926 4927 Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ)); 4928 4929 Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); 4930 mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal)); 4931 4932 qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default; 4933 sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default; 4934 } 4935 } 4936 4937 public static Operand EmitSse41ProcessNaNsOpF( 4938 ArmEmitterContext context, 4939 Func2I emit, 4940 bool scalar, 4941 Operand n = default, 4942 Operand m = default) 4943 { 4944 Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n; 4945 Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m; 4946 4947 EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask); 4948 EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false); 4949 4950 int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1; 4951 4952 if (sizeF == 0) 4953 { 4954 const int QBit = 22; 4955 4956 Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit); 4957 4958 Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); 4959 resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); 4960 4961 Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask); 4962 resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); 4963 4964 Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); 4965 4966 Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask); 4967 4968 if (n != default || m != default) 4969 { 4970 return res; 4971 } 4972 4973 if (scalar) 4974 { 4975 res = context.VectorZeroUpper96(res); 4976 } 4977 else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64) 4978 { 4979 res = context.VectorZeroUpper64(res); 4980 } 4981 4982 context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); 4983 4984 return default; 4985 } 4986 else /* if (sizeF == 1) */ 4987 { 4988 const int QBit = 51; 4989 4990 Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit); 4991 4992 Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); 4993 resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); 4994 4995 Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask); 4996 resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); 4997 4998 Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); 4999 5000 Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask); 5001 5002 if (n != default || m != default) 5003 { 5004 return res; 5005 } 5006 5007 if (scalar) 5008 { 5009 res = context.VectorZeroUpper64(res); 5010 } 5011 5012 context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); 5013 5014 return default; 5015 } 5016 } 5017 5018 private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax) 5019 { 5020 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 5021 5022 if ((op.Size & 1) == 0) 5023 { 5024 Operand mask = X86GetAllElements(context, -0f); 5025 5026 Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m); 5027 res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); 5028 5029 Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); 5030 resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign); 5031 5032 return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); 5033 } 5034 else /* if ((op.Size & 1) == 1) */ 5035 { 5036 Operand mask = X86GetAllElements(context, -0d); 5037 5038 Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m); 5039 res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); 5040 5041 Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); 5042 resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign); 5043 5044 return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); 5045 } 5046 } 5047 5048 private static Operand EmitSse41MaxMinNumOpF( 5049 ArmEmitterContext context, 5050 bool isMaxNum, 5051 bool scalar, 5052 Operand n = default, 5053 Operand m = default) 5054 { 5055 Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n; 5056 Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m; 5057 5058 EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true); 5059 EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true); 5060 5061 int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1; 5062 5063 if (sizeF == 0) 5064 { 5065 Operand negInfMask = scalar 5066 ? X86GetScalar(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity) 5067 : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity); 5068 5069 Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask); 5070 Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask); 5071 5072 nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask); 5073 mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask); 5074 5075 Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) => 5076 { 5077 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum); 5078 }, scalar: scalar, nCopy, mCopy); 5079 5080 if (n != default || m != default) 5081 { 5082 return res; 5083 } 5084 5085 if (scalar) 5086 { 5087 res = context.VectorZeroUpper96(res); 5088 } 5089 else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64) 5090 { 5091 res = context.VectorZeroUpper64(res); 5092 } 5093 5094 context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); 5095 5096 return default; 5097 } 5098 else /* if (sizeF == 1) */ 5099 { 5100 Operand negInfMask = scalar 5101 ? X86GetScalar(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity) 5102 : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity); 5103 5104 Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask); 5105 Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask); 5106 5107 nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask); 5108 mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask); 5109 5110 Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) => 5111 { 5112 return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum); 5113 }, scalar: scalar, nCopy, mCopy); 5114 5115 if (n != default || m != default) 5116 { 5117 return res; 5118 } 5119 5120 if (scalar) 5121 { 5122 res = context.VectorZeroUpper64(res); 5123 } 5124 5125 context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); 5126 5127 return default; 5128 } 5129 } 5130 5131 private enum AddSub 5132 { 5133 None, 5134 Add, 5135 Subtract, 5136 } 5137 5138 private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub) 5139 { 5140 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 5141 5142 Operand n = GetVec(op.Rn); 5143 Operand m = GetVec(op.Rm); 5144 5145 Operand res; 5146 5147 if (op.Size == 0) 5148 { 5149 Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8)); 5150 Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8)); 5151 5152 res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8); 5153 5154 res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8)); 5155 5156 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); 5157 5158 Operand mask = X86GetAllElements(context, 0x00FF00FF); 5159 5160 res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask); 5161 } 5162 else if (op.Size == 1) 5163 { 5164 res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); 5165 } 5166 else 5167 { 5168 res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m); 5169 } 5170 5171 Operand d = GetVec(op.Rd); 5172 5173 if (addSub == AddSub.Add) 5174 { 5175 Intrinsic addInst = X86PaddInstruction[op.Size]; 5176 5177 res = context.AddIntrinsic(addInst, d, res); 5178 } 5179 else if (addSub == AddSub.Subtract) 5180 { 5181 Intrinsic subInst = X86PsubInstruction[op.Size]; 5182 5183 res = context.AddIntrinsic(subInst, d, res); 5184 } 5185 5186 if (op.RegisterSize == RegisterSize.Simd64) 5187 { 5188 res = context.VectorZeroUpper64(res); 5189 } 5190 5191 context.Copy(d, res); 5192 } 5193 5194 private static void EmitSse41VectorSabdOp( 5195 ArmEmitterContext context, 5196 OpCodeSimdReg op, 5197 Operand n, 5198 Operand m, 5199 bool isLong) 5200 { 5201 int size = isLong ? op.Size + 1 : op.Size; 5202 5203 Intrinsic cmpgtInst = X86PcmpgtInstruction[size]; 5204 5205 Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m); 5206 5207 Intrinsic subInst = X86PsubInstruction[size]; 5208 5209 Operand res = context.AddIntrinsic(subInst, n, m); 5210 5211 res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); 5212 5213 Operand res2 = context.AddIntrinsic(subInst, m, n); 5214 5215 res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); 5216 5217 res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); 5218 5219 if (!isLong && op.RegisterSize == RegisterSize.Simd64) 5220 { 5221 res = context.VectorZeroUpper64(res); 5222 } 5223 5224 context.Copy(GetVec(op.Rd), res); 5225 } 5226 5227 private static void EmitSse41VectorUabdOp( 5228 ArmEmitterContext context, 5229 OpCodeSimdReg op, 5230 Operand n, 5231 Operand m, 5232 bool isLong) 5233 { 5234 int size = isLong ? op.Size + 1 : op.Size; 5235 5236 Intrinsic maxInst = X86PmaxuInstruction[size]; 5237 5238 Operand max = context.AddIntrinsic(maxInst, m, n); 5239 5240 Intrinsic cmpeqInst = X86PcmpeqInstruction[size]; 5241 5242 Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m); 5243 5244 Operand onesMask = X86GetAllElements(context, -1L); 5245 5246 cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask); 5247 5248 Intrinsic subInst = X86PsubInstruction[size]; 5249 5250 Operand res = context.AddIntrinsic(subInst, n, m); 5251 Operand res2 = context.AddIntrinsic(subInst, m, n); 5252 5253 res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); 5254 res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); 5255 5256 res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); 5257 5258 if (!isLong && op.RegisterSize == RegisterSize.Simd64) 5259 { 5260 res = context.VectorZeroUpper64(res); 5261 } 5262 5263 context.Copy(GetVec(op.Rd), res); 5264 } 5265 5266 private static Operand EmitSse2Sll_128(ArmEmitterContext context, Operand op, int shift) 5267 { 5268 // The upper part of op is assumed to be zero. 5269 Debug.Assert(shift >= 0 && shift < 64); 5270 5271 if (shift == 0) 5272 { 5273 return op; 5274 } 5275 5276 Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8)); 5277 high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift)); 5278 5279 Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift)); 5280 5281 return context.AddIntrinsic(Intrinsic.X86Por, high, low); 5282 } 5283 } 5284 }