InstEmitSimdHelper.cs
1 using ARMeilleure.CodeGen.X86; 2 using ARMeilleure.Decoders; 3 using ARMeilleure.IntermediateRepresentation; 4 using ARMeilleure.State; 5 using ARMeilleure.Translation; 6 using System; 7 using System.Diagnostics; 8 using System.Reflection; 9 using static ARMeilleure.Instructions.InstEmitHelper; 10 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 11 12 namespace ARMeilleure.Instructions 13 { 14 using Func1I = Func<Operand, Operand>; 15 using Func2I = Func<Operand, Operand, Operand>; 16 using Func3I = Func<Operand, Operand, Operand, Operand>; 17 18 static class InstEmitSimdHelper 19 { 20 #region "Masks" 21 public static readonly long[] EvenMasks = new long[] 22 { 23 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B 24 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H 25 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0, // S 26 }; 27 28 public static readonly long[] OddMasks = new long[] 29 { 30 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B 31 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H 32 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0, // S 33 }; 34 35 public const long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0; 36 37 public static ulong X86GetGf2p8LogicalShiftLeft(int shift) 38 { 39 ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) | 40 (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0); 41 42 return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8); 43 } 44 #endregion 45 46 #region "X86 SSE Intrinsics" 47 public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[] 48 { 49 Intrinsic.X86Paddb, 50 Intrinsic.X86Paddw, 51 Intrinsic.X86Paddd, 52 Intrinsic.X86Paddq, 53 }; 54 55 public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[] 56 { 57 Intrinsic.X86Pcmpeqb, 58 Intrinsic.X86Pcmpeqw, 59 Intrinsic.X86Pcmpeqd, 60 Intrinsic.X86Pcmpeqq, 61 }; 62 63 public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[] 64 { 65 Intrinsic.X86Pcmpgtb, 66 Intrinsic.X86Pcmpgtw, 67 Intrinsic.X86Pcmpgtd, 68 Intrinsic.X86Pcmpgtq, 69 }; 70 71 public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[] 72 { 73 Intrinsic.X86Pmaxsb, 74 Intrinsic.X86Pmaxsw, 75 Intrinsic.X86Pmaxsd, 76 }; 77 78 public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[] 79 { 80 Intrinsic.X86Pmaxub, 81 Intrinsic.X86Pmaxuw, 82 Intrinsic.X86Pmaxud, 83 }; 84 85 public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[] 86 { 87 Intrinsic.X86Pminsb, 88 Intrinsic.X86Pminsw, 89 Intrinsic.X86Pminsd, 90 }; 91 92 public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[] 93 { 94 Intrinsic.X86Pminub, 95 Intrinsic.X86Pminuw, 96 Intrinsic.X86Pminud, 97 }; 98 99 public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[] 100 { 101 Intrinsic.X86Pmovsxbw, 102 Intrinsic.X86Pmovsxwd, 103 Intrinsic.X86Pmovsxdq, 104 }; 105 106 public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[] 107 { 108 Intrinsic.X86Pmovzxbw, 109 Intrinsic.X86Pmovzxwd, 110 Intrinsic.X86Pmovzxdq, 111 }; 112 113 public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[] 114 { 115 0, 116 Intrinsic.X86Psllw, 117 Intrinsic.X86Pslld, 118 Intrinsic.X86Psllq, 119 }; 120 121 public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[] 122 { 123 0, 124 Intrinsic.X86Psraw, 125 Intrinsic.X86Psrad, 126 }; 127 128 public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[] 129 { 130 0, 131 Intrinsic.X86Psrlw, 132 Intrinsic.X86Psrld, 133 Intrinsic.X86Psrlq, 134 }; 135 136 public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[] 137 { 138 Intrinsic.X86Psubb, 139 Intrinsic.X86Psubw, 140 Intrinsic.X86Psubd, 141 Intrinsic.X86Psubq, 142 }; 143 144 public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[] 145 { 146 Intrinsic.X86Punpckhbw, 147 Intrinsic.X86Punpckhwd, 148 Intrinsic.X86Punpckhdq, 149 Intrinsic.X86Punpckhqdq, 150 }; 151 152 public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[] 153 { 154 Intrinsic.X86Punpcklbw, 155 Intrinsic.X86Punpcklwd, 156 Intrinsic.X86Punpckldq, 157 Intrinsic.X86Punpcklqdq, 158 }; 159 #endregion 160 161 public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag) 162 { 163 if (Optimizations.UseSse2) 164 { 165 Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); 166 167 Operand fzTrue = getFpFlag(FPState.FzFlag); 168 Operand r0True = getFpFlag(FPState.RMode0Flag); 169 Operand r1True = getFpFlag(FPState.RMode1Flag); 170 171 mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo))); 172 173 mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0))); 174 175 // X86 round modes in order: nearest, negative, positive, zero 176 // ARM round modes in order: nearest, positive, negative, zero 177 // Read the bits backwards to correct this. 178 179 mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0))); 180 mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0))); 181 182 context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); 183 } 184 else if (Optimizations.UseAdvSimd) 185 { 186 Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); 187 188 Operand fzTrue = getFpFlag(FPState.FzFlag); 189 Operand r0True = getFpFlag(FPState.RMode0Flag); 190 Operand r1True = getFpFlag(FPState.RMode1Flag); 191 192 fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1))); 193 194 fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0))); 195 fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0))); 196 fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0))); 197 198 context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); 199 200 // TODO: Restore FPSR 201 } 202 } 203 204 public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag) 205 { 206 if (Optimizations.UseSse2) 207 { 208 Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); 209 210 // Unset round mode (to nearest) and ftz. 211 mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo))); 212 213 context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); 214 215 // Status flags would be stored here if they were used. 216 } 217 else if (Optimizations.UseAdvSimd) 218 { 219 Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); 220 221 // Unset round mode (to nearest) and fz. 222 fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1))); 223 224 context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); 225 226 // TODO: Store FPSR 227 } 228 } 229 230 public static int GetImmShl(OpCodeSimdShImm op) 231 { 232 return op.Imm - (8 << op.Size); 233 } 234 235 public static int GetImmShr(OpCodeSimdShImm op) 236 { 237 return (8 << (op.Size + 1)) - op.Imm; 238 } 239 240 public static Operand X86GetScalar(ArmEmitterContext context, float value) 241 { 242 return X86GetScalar(context, BitConverter.SingleToInt32Bits(value)); 243 } 244 245 public static Operand X86GetScalar(ArmEmitterContext context, double value) 246 { 247 return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value)); 248 } 249 250 public static Operand X86GetScalar(ArmEmitterContext context, int value) 251 { 252 return context.VectorCreateScalar(Const(value)); 253 } 254 255 public static Operand X86GetScalar(ArmEmitterContext context, long value) 256 { 257 return context.VectorCreateScalar(Const(value)); 258 } 259 260 public static Operand X86GetAllElements(ArmEmitterContext context, float value) 261 { 262 return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value)); 263 } 264 265 public static Operand X86GetAllElements(ArmEmitterContext context, double value) 266 { 267 return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value)); 268 } 269 270 public static Operand X86GetAllElements(ArmEmitterContext context, short value) 271 { 272 ulong value1 = (ushort)value; 273 ulong value2 = value1 << 16 | value1; 274 ulong value4 = value2 << 32 | value2; 275 276 return X86GetAllElements(context, (long)value4); 277 } 278 279 public static Operand X86GetAllElements(ArmEmitterContext context, int value) 280 { 281 Operand vector = context.VectorCreateScalar(Const(value)); 282 283 vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0)); 284 285 return vector; 286 } 287 288 public static Operand X86GetAllElements(ArmEmitterContext context, long value) 289 { 290 Operand vector = context.VectorCreateScalar(Const(value)); 291 292 vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector); 293 294 return vector; 295 } 296 297 public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0) 298 { 299 return X86GetElements(context, (ulong)e1, (ulong)e0); 300 } 301 302 public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0) 303 { 304 Operand vector0 = context.VectorCreateScalar(Const(e0)); 305 Operand vector1 = context.VectorCreateScalar(Const(e1)); 306 307 return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1); 308 } 309 310 public static int X86GetRoundControl(FPRoundingMode roundMode) 311 { 312 return roundMode switch 313 { 314 #pragma warning disable IDE0055 // Disable formatting 315 FPRoundingMode.ToNearest => 8 | 0, // even 316 FPRoundingMode.TowardsPlusInfinity => 8 | 2, 317 FPRoundingMode.TowardsMinusInfinity => 8 | 1, 318 FPRoundingMode.TowardsZero => 8 | 3, 319 _ => throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."), 320 #pragma warning restore IDE0055 321 }; 322 } 323 324 public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar) 325 { 326 Debug.Assert(n.Type == OperandType.V128); 327 328 Operand nCopy = context.Copy(n); 329 330 Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero)); 331 332 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 333 334 if ((op.Size & 1) == 0) 335 { 336 Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue); 337 signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy); 338 339 // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1 340 Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF); 341 valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask); 342 343 nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask); 344 345 nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC); 346 } 347 else 348 { 349 Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue); 350 signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy); 351 352 // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L 353 Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL); 354 valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask); 355 356 nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask); 357 358 nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC); 359 } 360 361 return nCopy; 362 } 363 364 public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.). 365 { 366 Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64); 367 368 Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L))); 369 370 Operand c1 = Const(op.Type, 0x33L); 371 Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1)); 372 373 return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL)); 374 } 375 376 public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) 377 { 378 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 379 380 Operand n = GetVec(op.Rn); 381 382 Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; 383 384 Operand res = context.AddIntrinsic(inst, n); 385 386 if ((op.Size & 1) != 0) 387 { 388 res = context.VectorZeroUpper64(res); 389 } 390 else 391 { 392 res = context.VectorZeroUpper96(res); 393 } 394 395 context.Copy(GetVec(op.Rd), res); 396 } 397 398 public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) 399 { 400 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 401 402 Operand n = GetVec(op.Rn); 403 Operand m = GetVec(op.Rm); 404 405 Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; 406 407 Operand res = context.AddIntrinsic(inst, n, m); 408 409 if ((op.Size & 1) != 0) 410 { 411 res = context.VectorZeroUpper64(res); 412 } 413 else 414 { 415 res = context.VectorZeroUpper96(res); 416 } 417 418 context.Copy(GetVec(op.Rd), res); 419 } 420 421 public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) 422 { 423 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 424 425 Operand n = GetVec(op.Rn); 426 427 Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; 428 429 Operand res = context.AddIntrinsic(inst, n); 430 431 if (op.RegisterSize == RegisterSize.Simd64) 432 { 433 res = context.VectorZeroUpper64(res); 434 } 435 436 context.Copy(GetVec(op.Rd), res); 437 } 438 439 public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) 440 { 441 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 442 443 Operand n = GetVec(op.Rn); 444 Operand m = GetVec(op.Rm); 445 446 Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; 447 448 Operand res = context.AddIntrinsic(inst, n, m); 449 450 if (op.RegisterSize == RegisterSize.Simd64) 451 { 452 res = context.VectorZeroUpper64(res); 453 } 454 455 context.Copy(GetVec(op.Rd), res); 456 } 457 458 public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n) 459 { 460 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 461 462 MethodInfo info = (op.Size & 1) == 0 463 ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) }) 464 : typeof(Math).GetMethod(name, new Type[] { typeof(double) }); 465 466 return context.Call(info, n); 467 } 468 469 public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) 470 { 471 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 472 473 string name = nameof(Math.Round); 474 475 MethodInfo info = (op.Size & 1) == 0 476 ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) }) 477 : typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) }); 478 479 return context.Call(info, n, Const((int)roundMode)); 480 } 481 482 public static Operand EmitGetRoundingMode(ArmEmitterContext context) 483 { 484 Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1)); 485 rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag)); 486 487 return rMode; 488 } 489 490 public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op) 491 { 492 Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64); 493 494 Operand lbl1 = Label(); 495 Operand lbl2 = Label(); 496 Operand lbl3 = Label(); 497 Operand lblEnd = Label(); 498 499 Operand rN = Const((int)FPRoundingMode.ToNearest); 500 Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity); 501 Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity); 502 503 Operand res = context.AllocateLocal(op.Type); 504 505 Operand rMode = EmitGetRoundingMode(context); 506 507 context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual); 508 context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op)); 509 context.Branch(lblEnd); 510 511 context.MarkLabel(lbl1); 512 context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual); 513 context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op)); 514 context.Branch(lblEnd); 515 516 context.MarkLabel(lbl2); 517 context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual); 518 context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op)); 519 context.Branch(lblEnd); 520 521 context.MarkLabel(lbl3); 522 context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op)); 523 context.Branch(lblEnd); 524 525 context.MarkLabel(lblEnd); 526 527 return res; 528 } 529 530 public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs) 531 { 532 IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; 533 534 MethodInfo info = (op.Size & 1) == 0 535 ? typeof(SoftFloat32).GetMethod(name) 536 : typeof(SoftFloat64).GetMethod(name); 537 538 context.ExitArmFpMode(); 539 context.StoreToContext(); 540 Operand res = context.Call(info, callArgs); 541 context.LoadFromContext(); 542 context.EnterArmFpMode(); 543 544 return res; 545 } 546 547 public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit) 548 { 549 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 550 551 OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; 552 553 Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); 554 Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); 555 556 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); 557 } 558 559 public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit) 560 { 561 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 562 563 OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; 564 565 Operand d = context.VectorExtract(type, GetVec(op.Rd), 0); 566 Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); 567 Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); 568 569 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0)); 570 } 571 572 public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit) 573 { 574 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 575 576 Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); 577 578 Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); 579 580 context.Copy(GetVec(op.Rd), d); 581 } 582 583 public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit) 584 { 585 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 586 587 Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); 588 Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size); 589 590 Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); 591 592 context.Copy(GetVec(op.Rd), d); 593 } 594 595 public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit) 596 { 597 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 598 599 Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); 600 601 Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); 602 603 context.Copy(GetVec(op.Rd), d); 604 } 605 606 public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit) 607 { 608 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 609 610 Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); 611 Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); 612 613 Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); 614 615 context.Copy(GetVec(op.Rd), d); 616 } 617 618 public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit) 619 { 620 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 621 622 Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size); 623 Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); 624 Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); 625 626 d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size); 627 628 context.Copy(GetVec(op.Rd), d); 629 } 630 631 public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit) 632 { 633 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 634 635 OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; 636 637 Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); 638 639 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0)); 640 } 641 642 public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit) 643 { 644 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 645 646 OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; 647 648 Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); 649 Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); 650 651 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); 652 } 653 654 public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit) 655 { 656 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 657 658 OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; 659 660 Operand a = context.VectorExtract(type, GetVec(op.Ra), 0); 661 Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); 662 Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); 663 664 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0)); 665 } 666 667 public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit) 668 { 669 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 670 671 Operand res = context.VectorZero(); 672 673 int sizeF = op.Size & 1; 674 675 OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; 676 677 int elems = op.GetBytesCount() >> sizeF + 2; 678 679 for (int index = 0; index < elems; index++) 680 { 681 Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); 682 683 res = context.VectorInsert(res, emit(ne), index); 684 } 685 686 context.Copy(GetVec(op.Rd), res); 687 } 688 689 public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit) 690 { 691 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 692 693 Operand res = context.VectorZero(); 694 695 int sizeF = op.Size & 1; 696 697 OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; 698 699 int elems = op.GetBytesCount() >> sizeF + 2; 700 701 for (int index = 0; index < elems; index++) 702 { 703 Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); 704 Operand me = context.VectorExtract(type, GetVec(op.Rm), index); 705 706 res = context.VectorInsert(res, emit(ne, me), index); 707 } 708 709 context.Copy(GetVec(op.Rd), res); 710 } 711 712 public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit) 713 { 714 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 715 716 Operand res = context.VectorZero(); 717 718 int sizeF = op.Size & 1; 719 720 OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; 721 722 int elems = op.GetBytesCount() >> sizeF + 2; 723 724 for (int index = 0; index < elems; index++) 725 { 726 Operand de = context.VectorExtract(type, GetVec(op.Rd), index); 727 Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); 728 Operand me = context.VectorExtract(type, GetVec(op.Rm), index); 729 730 res = context.VectorInsert(res, emit(de, ne, me), index); 731 } 732 733 context.Copy(GetVec(op.Rd), res); 734 } 735 736 public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit) 737 { 738 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 739 740 Operand res = context.VectorZero(); 741 742 int sizeF = op.Size & 1; 743 744 OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; 745 746 int elems = op.GetBytesCount() >> sizeF + 2; 747 748 for (int index = 0; index < elems; index++) 749 { 750 Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); 751 Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); 752 753 res = context.VectorInsert(res, emit(ne, me), index); 754 } 755 756 context.Copy(GetVec(op.Rd), res); 757 } 758 759 public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit) 760 { 761 OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; 762 763 Operand res = context.VectorZero(); 764 765 int sizeF = op.Size & 1; 766 767 OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; 768 769 int elems = op.GetBytesCount() >> sizeF + 2; 770 771 for (int index = 0; index < elems; index++) 772 { 773 Operand de = context.VectorExtract(type, GetVec(op.Rd), index); 774 Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); 775 Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); 776 777 res = context.VectorInsert(res, emit(de, ne, me), index); 778 } 779 780 context.Copy(GetVec(op.Rd), res); 781 } 782 783 public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit) 784 { 785 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 786 787 Operand res = context.VectorZero(); 788 789 int elems = op.GetBytesCount() >> op.Size; 790 791 for (int index = 0; index < elems; index++) 792 { 793 Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); 794 795 res = EmitVectorInsert(context, res, emit(ne), index, op.Size); 796 } 797 798 context.Copy(GetVec(op.Rd), res); 799 } 800 801 public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit) 802 { 803 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 804 805 Operand res = context.VectorZero(); 806 807 int elems = op.GetBytesCount() >> op.Size; 808 809 for (int index = 0; index < elems; index++) 810 { 811 Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); 812 Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); 813 814 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); 815 } 816 817 context.Copy(GetVec(op.Rd), res); 818 } 819 820 public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit) 821 { 822 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 823 824 Operand res = context.VectorZero(); 825 826 int elems = op.GetBytesCount() >> op.Size; 827 828 for (int index = 0; index < elems; index++) 829 { 830 Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size); 831 Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); 832 Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); 833 834 res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); 835 } 836 837 context.Copy(GetVec(op.Rd), res); 838 } 839 840 public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit) 841 { 842 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 843 844 Operand res = context.VectorZero(); 845 846 int elems = op.GetBytesCount() >> op.Size; 847 848 for (int index = 0; index < elems; index++) 849 { 850 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 851 852 res = EmitVectorInsert(context, res, emit(ne), index, op.Size); 853 } 854 855 context.Copy(GetVec(op.Rd), res); 856 } 857 858 public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit) 859 { 860 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 861 862 Operand res = context.VectorZero(); 863 864 int elems = op.GetBytesCount() >> op.Size; 865 866 for (int index = 0; index < elems; index++) 867 { 868 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 869 Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); 870 871 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); 872 } 873 874 context.Copy(GetVec(op.Rd), res); 875 } 876 877 public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit) 878 { 879 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 880 881 Operand res = context.VectorZero(); 882 883 int elems = op.GetBytesCount() >> op.Size; 884 885 for (int index = 0; index < elems; index++) 886 { 887 Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); 888 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 889 Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); 890 891 res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); 892 } 893 894 context.Copy(GetVec(op.Rd), res); 895 } 896 897 public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) 898 { 899 OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; 900 901 Operand res = context.VectorZero(); 902 903 Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size); 904 905 int elems = op.GetBytesCount() >> op.Size; 906 907 for (int index = 0; index < elems; index++) 908 { 909 Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); 910 911 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); 912 } 913 914 context.Copy(GetVec(op.Rd), res); 915 } 916 917 public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) 918 { 919 OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; 920 921 Operand res = context.VectorZero(); 922 923 Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); 924 925 int elems = op.GetBytesCount() >> op.Size; 926 927 for (int index = 0; index < elems; index++) 928 { 929 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 930 931 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); 932 } 933 934 context.Copy(GetVec(op.Rd), res); 935 } 936 937 public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) 938 { 939 OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; 940 941 Operand res = context.VectorZero(); 942 943 Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); 944 945 int elems = op.GetBytesCount() >> op.Size; 946 947 for (int index = 0; index < elems; index++) 948 { 949 Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); 950 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); 951 952 res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); 953 } 954 955 context.Copy(GetVec(op.Rd), res); 956 } 957 958 public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit) 959 { 960 OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; 961 962 Operand imm = Const(op.Immediate); 963 964 Operand res = context.VectorZero(); 965 966 int elems = op.GetBytesCount() >> op.Size; 967 968 for (int index = 0; index < elems; index++) 969 { 970 res = EmitVectorInsert(context, res, emit(imm), index, op.Size); 971 } 972 973 context.Copy(GetVec(op.Rd), res); 974 } 975 976 public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit) 977 { 978 OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; 979 980 Operand imm = Const(op.Immediate); 981 982 Operand res = context.VectorZero(); 983 984 int elems = op.GetBytesCount() >> op.Size; 985 986 for (int index = 0; index < elems; index++) 987 { 988 Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); 989 990 res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size); 991 } 992 993 context.Copy(GetVec(op.Rd), res); 994 } 995 996 public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit) 997 { 998 EmitVectorWidenRmBinaryOp(context, emit, signed: true); 999 } 1000 1001 public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit) 1002 { 1003 EmitVectorWidenRmBinaryOp(context, emit, signed: false); 1004 } 1005 1006 private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) 1007 { 1008 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1009 1010 Operand res = context.VectorZero(); 1011 1012 int elems = 8 >> op.Size; 1013 1014 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 1015 1016 for (int index = 0; index < elems; index++) 1017 { 1018 Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); 1019 Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); 1020 1021 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); 1022 } 1023 1024 context.Copy(GetVec(op.Rd), res); 1025 } 1026 1027 public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit) 1028 { 1029 EmitVectorWidenRnRmBinaryOp(context, emit, signed: true); 1030 } 1031 1032 public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit) 1033 { 1034 EmitVectorWidenRnRmBinaryOp(context, emit, signed: false); 1035 } 1036 1037 private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) 1038 { 1039 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1040 1041 Operand res = context.VectorZero(); 1042 1043 int elems = 8 >> op.Size; 1044 1045 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 1046 1047 for (int index = 0; index < elems; index++) 1048 { 1049 Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); 1050 Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); 1051 1052 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); 1053 } 1054 1055 context.Copy(GetVec(op.Rd), res); 1056 } 1057 1058 public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit) 1059 { 1060 EmitVectorWidenRnRmTernaryOp(context, emit, signed: true); 1061 } 1062 1063 public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit) 1064 { 1065 EmitVectorWidenRnRmTernaryOp(context, emit, signed: false); 1066 } 1067 1068 private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed) 1069 { 1070 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1071 1072 Operand res = context.VectorZero(); 1073 1074 int elems = 8 >> op.Size; 1075 1076 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 1077 1078 for (int index = 0; index < elems; index++) 1079 { 1080 Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); 1081 Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); 1082 Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); 1083 1084 res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); 1085 } 1086 1087 context.Copy(GetVec(op.Rd), res); 1088 } 1089 1090 public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) 1091 { 1092 EmitVectorWidenBinaryOpByElem(context, emit, signed: true); 1093 } 1094 1095 public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) 1096 { 1097 EmitVectorWidenBinaryOpByElem(context, emit, signed: false); 1098 } 1099 1100 private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed) 1101 { 1102 OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; 1103 1104 Operand res = context.VectorZero(); 1105 1106 Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed); 1107 1108 int elems = 8 >> op.Size; 1109 1110 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 1111 1112 for (int index = 0; index < elems; index++) 1113 { 1114 Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); 1115 1116 res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); 1117 } 1118 1119 context.Copy(GetVec(op.Rd), res); 1120 } 1121 1122 public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit) 1123 { 1124 EmitVectorWidenTernaryOpByElem(context, emit, signed: true); 1125 } 1126 1127 public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) 1128 { 1129 EmitVectorWidenTernaryOpByElem(context, emit, signed: false); 1130 } 1131 1132 private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed) 1133 { 1134 OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; 1135 1136 Operand res = context.VectorZero(); 1137 1138 Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed); 1139 1140 int elems = 8 >> op.Size; 1141 1142 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 1143 1144 for (int index = 0; index < elems; index++) 1145 { 1146 Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); 1147 Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); 1148 1149 res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); 1150 } 1151 1152 context.Copy(GetVec(op.Rd), res); 1153 } 1154 1155 public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit) 1156 { 1157 EmitVectorPairwiseOp(context, emit, signed: true); 1158 } 1159 1160 public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit) 1161 { 1162 EmitVectorPairwiseOp(context, emit, signed: false); 1163 } 1164 1165 private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed) 1166 { 1167 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1168 1169 Operand res = context.VectorZero(); 1170 1171 int pairs = op.GetPairsCount() >> op.Size; 1172 1173 for (int index = 0; index < pairs; index++) 1174 { 1175 int pairIndex = index << 1; 1176 1177 Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); 1178 Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); 1179 1180 Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed); 1181 Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed); 1182 1183 res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size); 1184 res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size); 1185 } 1186 1187 context.Copy(GetVec(op.Rd), res); 1188 } 1189 1190 public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst) 1191 { 1192 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1193 1194 Operand n = GetVec(op.Rn); 1195 Operand m = GetVec(op.Rm); 1196 1197 if (op.RegisterSize == RegisterSize.Simd64) 1198 { 1199 Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]); 1200 Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]); 1201 1202 Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n 1203 1204 Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n 1205 Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n 1206 1207 context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right)); 1208 } 1209 else if (op.Size < 3) 1210 { 1211 Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]); 1212 1213 Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n 1214 Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m 1215 1216 Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM); 1217 Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM); 1218 1219 context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right)); 1220 } 1221 else 1222 { 1223 Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); 1224 Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m); 1225 1226 context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right)); 1227 } 1228 } 1229 1230 public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) 1231 { 1232 EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); 1233 } 1234 1235 public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) 1236 { 1237 EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); 1238 } 1239 1240 public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) 1241 { 1242 EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); 1243 } 1244 1245 public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) 1246 { 1247 EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); 1248 } 1249 1250 private static void EmitVectorAcrossVectorOp( 1251 ArmEmitterContext context, 1252 Func2I emit, 1253 bool signed, 1254 bool isLong) 1255 { 1256 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1257 1258 int elems = op.GetBytesCount() >> op.Size; 1259 1260 Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed); 1261 1262 for (int index = 1; index < elems; index++) 1263 { 1264 Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed); 1265 1266 res = emit(res, n); 1267 } 1268 1269 int size = isLong ? op.Size + 1 : op.Size; 1270 1271 Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size); 1272 1273 context.Copy(GetVec(op.Rd), d); 1274 } 1275 1276 public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) 1277 { 1278 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1279 1280 Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); 1281 1282 Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); 1283 1284 for (int index = 1; index < 4; index++) 1285 { 1286 Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index); 1287 1288 res = emit(res, n); 1289 } 1290 1291 Operand d = context.VectorInsert(context.VectorZero(), res, 0); 1292 1293 context.Copy(GetVec(op.Rd), d); 1294 } 1295 1296 public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) 1297 { 1298 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1299 1300 Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); 1301 1302 const int SM0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0; 1303 const int SM1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0; 1304 const int SM2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0; 1305 const int SM3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0; 1306 1307 Operand nCopy = context.Copy(GetVec(op.Rn)); 1308 1309 Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM0)); 1310 Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM1)); 1311 Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM2)); 1312 Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(SM3)); 1313 1314 Operand res = emit(emit(part0, part1), emit(part2, part3)); 1315 1316 context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); 1317 } 1318 1319 public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) 1320 { 1321 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1322 1323 OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; 1324 1325 Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); 1326 Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); 1327 1328 Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0); 1329 1330 context.Copy(GetVec(op.Rd), res); 1331 } 1332 1333 public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) 1334 { 1335 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1336 1337 Operand n = GetVec(op.Rn); 1338 1339 Operand op0, op1; 1340 1341 if ((op.Size & 1) == 0) 1342 { 1343 const int SM0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0; 1344 const int SM1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0; 1345 1346 Operand zeroN = context.VectorZeroUpper64(n); 1347 1348 op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(SM0)); 1349 op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(SM1)); 1350 } 1351 else /* if ((op.Size & 1) == 1) */ 1352 { 1353 Operand zero = context.VectorZero(); 1354 1355 op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero); 1356 op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n); 1357 } 1358 1359 context.Copy(GetVec(op.Rd), emit(op0, op1)); 1360 } 1361 1362 public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) 1363 { 1364 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1365 1366 Operand res = context.VectorZero(); 1367 1368 int sizeF = op.Size & 1; 1369 1370 OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; 1371 1372 int pairs = op.GetPairsCount() >> sizeF + 2; 1373 1374 for (int index = 0; index < pairs; index++) 1375 { 1376 int pairIndex = index << 1; 1377 1378 Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex); 1379 Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1); 1380 1381 Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex); 1382 Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1); 1383 1384 res = context.VectorInsert(res, emit(n0, n1), index); 1385 res = context.VectorInsert(res, emit(m0, m1), pairs + index); 1386 } 1387 1388 context.Copy(GetVec(op.Rd), res); 1389 } 1390 1391 public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit) 1392 { 1393 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 1394 1395 Operand nCopy = context.Copy(GetVec(op.Rn)); 1396 Operand mCopy = context.Copy(GetVec(op.Rm)); 1397 1398 int sizeF = op.Size & 1; 1399 1400 if (sizeF == 0) 1401 { 1402 if (op.RegisterSize == RegisterSize.Simd64) 1403 { 1404 Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy); 1405 1406 Operand zero = context.VectorZero(); 1407 1408 Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero); 1409 Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck); 1410 1411 context.Copy(GetVec(op.Rd), emit(part0, part1)); 1412 } 1413 else /* if (op.RegisterSize == RegisterSize.Simd128) */ 1414 { 1415 const int SM0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; 1416 const int SM1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; 1417 1418 Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(SM0)); 1419 Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(SM1)); 1420 1421 context.Copy(GetVec(op.Rd), emit(part0, part1)); 1422 } 1423 } 1424 else /* if (sizeF == 1) */ 1425 { 1426 Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy); 1427 Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy); 1428 1429 context.Copy(GetVec(op.Rd), emit(part0, part1)); 1430 } 1431 } 1432 1433 public enum CmpCondition 1434 { 1435 // Legacy Sse. 1436 Equal = 0, // Ordered, non-signaling. 1437 LessThan = 1, // Ordered, signaling. 1438 LessThanOrEqual = 2, // Ordered, signaling. 1439 UnorderedQ = 3, // Non-signaling. 1440 NotLessThan = 5, // Unordered, signaling. 1441 NotLessThanOrEqual = 6, // Unordered, signaling. 1442 OrderedQ = 7, // Non-signaling. 1443 1444 // Vex. 1445 GreaterThanOrEqual = 13, // Ordered, signaling. 1446 GreaterThan = 14, // Ordered, signaling. 1447 OrderedS = 23, // Signaling. 1448 } 1449 1450 [Flags] 1451 public enum SaturatingFlags 1452 { 1453 None = 0, 1454 1455 ByElem = 1 << 0, 1456 Scalar = 1 << 1, 1457 Signed = 1 << 2, 1458 1459 Add = 1 << 3, 1460 Sub = 1 << 4, 1461 1462 Accumulate = 1 << 5, 1463 } 1464 1465 public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) 1466 { 1467 EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed); 1468 } 1469 1470 public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) 1471 { 1472 EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed); 1473 } 1474 1475 public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags) 1476 { 1477 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1478 1479 Operand res = context.VectorZero(); 1480 1481 bool scalar = (flags & SaturatingFlags.Scalar) != 0; 1482 1483 int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; 1484 1485 for (int index = 0; index < elems; index++) 1486 { 1487 Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); 1488 Operand de; 1489 1490 if (op.Size <= 2) 1491 { 1492 de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true); 1493 } 1494 else /* if (op.Size == 3) */ 1495 { 1496 de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne)); 1497 } 1498 1499 res = EmitVectorInsert(context, res, de, index, op.Size); 1500 } 1501 1502 context.Copy(GetVec(op.Rd), res); 1503 } 1504 1505 public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None) 1506 { 1507 EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags); 1508 } 1509 1510 public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) 1511 { 1512 EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags); 1513 } 1514 1515 public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None) 1516 { 1517 EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags); 1518 } 1519 1520 public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) 1521 { 1522 EmitSaturatingBinaryOp(context, null, flags); 1523 } 1524 1525 public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) 1526 { 1527 EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed); 1528 } 1529 1530 public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags) 1531 { 1532 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1533 1534 Operand res = context.VectorZero(); 1535 1536 bool byElem = (flags & SaturatingFlags.ByElem) != 0; 1537 bool scalar = (flags & SaturatingFlags.Scalar) != 0; 1538 bool signed = (flags & SaturatingFlags.Signed) != 0; 1539 1540 bool add = (flags & SaturatingFlags.Add) != 0; 1541 bool sub = (flags & SaturatingFlags.Sub) != 0; 1542 1543 bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; 1544 1545 int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; 1546 1547 if (add || sub) 1548 { 1549 for (int index = 0; index < elems; index++) 1550 { 1551 Operand de; 1552 Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); 1553 Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed); 1554 1555 if (op.Size <= 2) 1556 { 1557 Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me); 1558 1559 de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed); 1560 } 1561 else /* if (op.Size == 3) */ 1562 { 1563 if (add) 1564 { 1565 de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me); 1566 } 1567 else /* if (sub) */ 1568 { 1569 de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me); 1570 } 1571 } 1572 1573 res = EmitVectorInsert(context, res, de, index, op.Size); 1574 } 1575 } 1576 else if (accumulate) 1577 { 1578 for (int index = 0; index < elems; index++) 1579 { 1580 Operand de; 1581 Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed); 1582 Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed); 1583 1584 if (op.Size <= 2) 1585 { 1586 Operand temp = context.Add(ne, me); 1587 1588 de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed); 1589 } 1590 else /* if (op.Size == 3) */ 1591 { 1592 de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me); 1593 } 1594 1595 res = EmitVectorInsert(context, res, de, index, op.Size); 1596 } 1597 } 1598 else 1599 { 1600 Operand me = default; 1601 1602 if (byElem) 1603 { 1604 OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op; 1605 1606 me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed); 1607 } 1608 1609 for (int index = 0; index < elems; index++) 1610 { 1611 Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); 1612 1613 if (!byElem) 1614 { 1615 me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed); 1616 } 1617 1618 Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed); 1619 1620 res = EmitVectorInsert(context, res, de, index, op.Size); 1621 } 1622 } 1623 1624 context.Copy(GetVec(op.Rd), res); 1625 } 1626 1627 [Flags] 1628 public enum SaturatingNarrowFlags 1629 { 1630 Scalar = 1 << 0, 1631 SignedSrc = 1 << 1, 1632 SignedDst = 1 << 2, 1633 1634 ScalarSxSx = Scalar | SignedSrc | SignedDst, 1635 ScalarSxZx = Scalar | SignedSrc, 1636 ScalarZxZx = Scalar, 1637 1638 VectorSxSx = SignedSrc | SignedDst, 1639 VectorSxZx = SignedSrc, 1640 VectorZxZx = 0, 1641 } 1642 1643 public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags) 1644 { 1645 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 1646 1647 bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; 1648 bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; 1649 bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; 1650 1651 int elems = !scalar ? 8 >> op.Size : 1; 1652 1653 int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; 1654 1655 Operand d = GetVec(op.Rd); 1656 1657 Operand res = part == 0 ? context.VectorZero() : context.Copy(d); 1658 1659 for (int index = 0; index < elems; index++) 1660 { 1661 Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); 1662 1663 Operand temp = signedSrc 1664 ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst) 1665 : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst); 1666 1667 res = EmitVectorInsert(context, res, temp, part + index, op.Size); 1668 } 1669 1670 context.Copy(d, res); 1671 } 1672 1673 // long SignedSignSatQ(long op, int size); 1674 public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size) 1675 { 1676 int eSize = 8 << size; 1677 1678 Debug.Assert(op.Type == OperandType.I64); 1679 Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); 1680 1681 Operand lbl1 = Label(); 1682 Operand lblEnd = Label(); 1683 1684 Operand zeroL = Const(0L); 1685 Operand maxT = Const((1L << (eSize - 1)) - 1L); 1686 Operand minT = Const(-(1L << (eSize - 1))); 1687 1688 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL); 1689 1690 context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual); 1691 context.Copy(res, maxT); 1692 SetFpFlag(context, FPState.QcFlag, Const(1)); 1693 context.Branch(lblEnd); 1694 1695 context.MarkLabel(lbl1); 1696 context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual); 1697 context.Copy(res, minT); 1698 SetFpFlag(context, FPState.QcFlag, Const(1)); 1699 context.Branch(lblEnd); 1700 1701 context.MarkLabel(lblEnd); 1702 1703 return res; 1704 } 1705 1706 // private static ulong UnsignedSignSatQ(ulong op, int size); 1707 public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size) 1708 { 1709 int eSize = 8 << size; 1710 1711 Debug.Assert(op.Type == OperandType.I64); 1712 Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); 1713 1714 Operand lblEnd = Label(); 1715 1716 Operand zeroUL = Const(0UL); 1717 Operand maxT = Const(ulong.MaxValue >> (64 - eSize)); 1718 1719 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL); 1720 1721 context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI); 1722 context.Copy(res, maxT); 1723 SetFpFlag(context, FPState.QcFlag, Const(1)); 1724 context.Branch(lblEnd); 1725 1726 context.MarkLabel(lblEnd); 1727 1728 return res; 1729 } 1730 1731 // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned). 1732 // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size); 1733 public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst) 1734 { 1735 int eSizeDst = 8 << sizeDst; 1736 1737 Debug.Assert(op.Type == OperandType.I64); 1738 Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32); 1739 1740 Operand lbl1 = Label(); 1741 Operand lblEnd = Label(); 1742 1743 Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL); 1744 Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL); 1745 1746 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); 1747 1748 context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual); 1749 context.Copy(res, maxT); 1750 SetFpFlag(context, FPState.QcFlag, Const(1)); 1751 context.Branch(lblEnd); 1752 1753 context.MarkLabel(lbl1); 1754 context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual); 1755 context.Copy(res, minT); 1756 SetFpFlag(context, FPState.QcFlag, Const(1)); 1757 context.Branch(lblEnd); 1758 1759 context.MarkLabel(lblEnd); 1760 1761 return res; 1762 } 1763 1764 // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). 1765 // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size); 1766 public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst) 1767 { 1768 int eSizeDst = 8 << sizeDst; 1769 1770 Debug.Assert(op.Type == OperandType.I64); 1771 Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32); 1772 1773 Operand lblEnd = Label(); 1774 1775 Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL); 1776 1777 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); 1778 1779 context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI); 1780 context.Copy(res, maxT); 1781 SetFpFlag(context, FPState.QcFlag, Const(1)); 1782 context.Branch(lblEnd); 1783 1784 context.MarkLabel(lblEnd); 1785 1786 return res; 1787 } 1788 1789 // long UnarySignedSatQAbsOrNeg(long op); 1790 private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op) 1791 { 1792 Debug.Assert(op.Type == OperandType.I64); 1793 1794 Operand lblEnd = Label(); 1795 1796 Operand minL = Const(long.MinValue); 1797 Operand maxL = Const(long.MaxValue); 1798 1799 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); 1800 1801 context.BranchIf(lblEnd, op, minL, Comparison.NotEqual); 1802 context.Copy(res, maxL); 1803 SetFpFlag(context, FPState.QcFlag, Const(1)); 1804 context.Branch(lblEnd); 1805 1806 context.MarkLabel(lblEnd); 1807 1808 return res; 1809 } 1810 1811 // long BinarySignedSatQAdd(long op1, long op2); 1812 public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2) 1813 { 1814 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 1815 1816 Operand lblEnd = Label(); 1817 1818 Operand minL = Const(long.MinValue); 1819 Operand maxL = Const(long.MaxValue); 1820 Operand zeroL = Const(0L); 1821 1822 Operand add = context.Add(op1, op2); 1823 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); 1824 1825 Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2)); 1826 Operand right = context.BitwiseExclusiveOr(op1, add); 1827 context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual); 1828 1829 Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL); 1830 context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL)); 1831 SetFpFlag(context, FPState.QcFlag, Const(1)); 1832 context.Branch(lblEnd); 1833 1834 context.MarkLabel(lblEnd); 1835 1836 return res; 1837 } 1838 1839 // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2); 1840 public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2) 1841 { 1842 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 1843 1844 Operand lblEnd = Label(); 1845 1846 Operand maxUL = Const(ulong.MaxValue); 1847 1848 Operand add = context.Add(op1, op2); 1849 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); 1850 1851 context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI); 1852 context.Copy(res, maxUL); 1853 SetFpFlag(context, FPState.QcFlag, Const(1)); 1854 context.Branch(lblEnd); 1855 1856 context.MarkLabel(lblEnd); 1857 1858 return res; 1859 } 1860 1861 // long BinarySignedSatQSub(long op1, long op2); 1862 public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2) 1863 { 1864 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 1865 1866 Operand lblEnd = Label(); 1867 1868 Operand minL = Const(long.MinValue); 1869 Operand maxL = Const(long.MaxValue); 1870 Operand zeroL = Const(0L); 1871 1872 Operand sub = context.Subtract(op1, op2); 1873 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub); 1874 1875 Operand left = context.BitwiseExclusiveOr(op1, op2); 1876 Operand right = context.BitwiseExclusiveOr(op1, sub); 1877 context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual); 1878 1879 Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL); 1880 context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL)); 1881 SetFpFlag(context, FPState.QcFlag, Const(1)); 1882 context.Branch(lblEnd); 1883 1884 context.MarkLabel(lblEnd); 1885 1886 return res; 1887 } 1888 1889 // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2); 1890 public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2) 1891 { 1892 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 1893 1894 Operand lblEnd = Label(); 1895 1896 Operand zeroL = Const(0L); 1897 1898 Operand sub = context.Subtract(op1, op2); 1899 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub); 1900 1901 context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI); 1902 context.Copy(res, zeroL); 1903 SetFpFlag(context, FPState.QcFlag, Const(1)); 1904 context.Branch(lblEnd); 1905 1906 context.MarkLabel(lblEnd); 1907 1908 return res; 1909 } 1910 1911 // long BinarySignedSatQAcc(ulong op1, long op2); 1912 private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2) 1913 { 1914 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 1915 1916 Operand lbl1 = Label(); 1917 Operand lbl2 = Label(); 1918 Operand lblEnd = Label(); 1919 1920 Operand maxL = Const(long.MaxValue); 1921 Operand zeroL = Const(0L); 1922 1923 Operand add = context.Add(op1, op2); 1924 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); 1925 1926 context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI); 1927 Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add); 1928 context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual); 1929 context.Copy(res, maxL); 1930 SetFpFlag(context, FPState.QcFlag, Const(1)); 1931 context.Branch(lblEnd); 1932 1933 context.MarkLabel(lbl1); 1934 context.BranchIf(lbl2, op2, zeroL, Comparison.Less); 1935 context.Copy(res, maxL); 1936 SetFpFlag(context, FPState.QcFlag, Const(1)); 1937 context.Branch(lblEnd); 1938 1939 context.MarkLabel(lbl2); 1940 context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI); 1941 context.Copy(res, maxL); 1942 SetFpFlag(context, FPState.QcFlag, Const(1)); 1943 context.Branch(lblEnd); 1944 1945 context.MarkLabel(lblEnd); 1946 1947 return res; 1948 } 1949 1950 // ulong BinaryUnsignedSatQAcc(long op1, ulong op2); 1951 private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2) 1952 { 1953 Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); 1954 1955 Operand lbl1 = Label(); 1956 Operand lblEnd = Label(); 1957 1958 Operand maxUL = Const(ulong.MaxValue); 1959 Operand maxL = Const(long.MaxValue); 1960 Operand zeroL = Const(0L); 1961 1962 Operand add = context.Add(op1, op2); 1963 Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); 1964 1965 context.BranchIf(lbl1, op1, zeroL, Comparison.Less); 1966 context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI); 1967 context.Copy(res, maxUL); 1968 SetFpFlag(context, FPState.QcFlag, Const(1)); 1969 context.Branch(lblEnd); 1970 1971 context.MarkLabel(lbl1); 1972 context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI); 1973 context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual); 1974 context.Copy(res, zeroL); 1975 SetFpFlag(context, FPState.QcFlag, Const(1)); 1976 context.Branch(lblEnd); 1977 1978 context.MarkLabel(lblEnd); 1979 1980 return res; 1981 } 1982 1983 public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector) 1984 { 1985 Operand mask; 1986 if (single) 1987 { 1988 mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f); 1989 } 1990 else 1991 { 1992 mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d); 1993 } 1994 1995 return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value); 1996 } 1997 1998 public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size) 1999 { 2000 return EmitVectorExtract(context, reg, index, size, true); 2001 } 2002 2003 public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size) 2004 { 2005 return EmitVectorExtract(context, reg, index, size, false); 2006 } 2007 2008 public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed) 2009 { 2010 ThrowIfInvalid(index, size); 2011 2012 Operand res = default; 2013 2014 switch (size) 2015 { 2016 case 0: 2017 res = context.VectorExtract8(GetVec(reg), index); 2018 break; 2019 2020 case 1: 2021 res = context.VectorExtract16(GetVec(reg), index); 2022 break; 2023 2024 case 2: 2025 res = context.VectorExtract(OperandType.I32, GetVec(reg), index); 2026 break; 2027 2028 case 3: 2029 res = context.VectorExtract(OperandType.I64, GetVec(reg), index); 2030 break; 2031 } 2032 2033 if (signed) 2034 { 2035 switch (size) 2036 { 2037 case 0: 2038 res = context.SignExtend8(OperandType.I64, res); 2039 break; 2040 case 1: 2041 res = context.SignExtend16(OperandType.I64, res); 2042 break; 2043 case 2: 2044 res = context.SignExtend32(OperandType.I64, res); 2045 break; 2046 } 2047 } 2048 else 2049 { 2050 switch (size) 2051 { 2052 case 0: 2053 res = context.ZeroExtend8(OperandType.I64, res); 2054 break; 2055 case 1: 2056 res = context.ZeroExtend16(OperandType.I64, res); 2057 break; 2058 case 2: 2059 res = context.ZeroExtend32(OperandType.I64, res); 2060 break; 2061 } 2062 } 2063 2064 return res; 2065 } 2066 2067 public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size) 2068 { 2069 ThrowIfInvalid(index, size); 2070 2071 if (size < 3 && value.Type == OperandType.I64) 2072 { 2073 value = context.ConvertI64ToI32(value); 2074 } 2075 2076 switch (size) 2077 { 2078 case 0: 2079 vector = context.VectorInsert8(vector, value, index); 2080 break; 2081 case 1: 2082 vector = context.VectorInsert16(vector, value, index); 2083 break; 2084 case 2: 2085 vector = context.VectorInsert(vector, value, index); 2086 break; 2087 case 3: 2088 vector = context.VectorInsert(vector, value, index); 2089 break; 2090 } 2091 2092 return vector; 2093 } 2094 2095 public static void ThrowIfInvalid(int index, int size) 2096 { 2097 if ((uint)size > 3u) 2098 { 2099 throw new ArgumentOutOfRangeException(nameof(size)); 2100 } 2101 2102 if ((uint)index >= 16u >> size) 2103 { 2104 throw new ArgumentOutOfRangeException(nameof(index)); 2105 } 2106 } 2107 } 2108 }