InstEmitSimdLogical.cs
1 using ARMeilleure.Decoders; 2 using ARMeilleure.IntermediateRepresentation; 3 using ARMeilleure.Translation; 4 using System; 5 using System.Diagnostics; 6 using static ARMeilleure.Instructions.InstEmitHelper; 7 using static ARMeilleure.Instructions.InstEmitSimdHelper; 8 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 9 10 namespace ARMeilleure.Instructions 11 { 12 static partial class InstEmit 13 { 14 public static void And_V(ArmEmitterContext context) 15 { 16 if (Optimizations.UseAdvSimd) 17 { 18 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV); 19 } 20 else if (Optimizations.UseSse2) 21 { 22 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 23 24 Operand n = GetVec(op.Rn); 25 Operand m = GetVec(op.Rm); 26 27 Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); 28 29 if (op.RegisterSize == RegisterSize.Simd64) 30 { 31 res = context.VectorZeroUpper64(res); 32 } 33 34 context.Copy(GetVec(op.Rd), res); 35 } 36 else 37 { 38 EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2)); 39 } 40 } 41 42 public static void Bic_V(ArmEmitterContext context) 43 { 44 if (Optimizations.UseAdvSimd) 45 { 46 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV); 47 } 48 else if (Optimizations.UseSse2) 49 { 50 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 51 52 Operand n = GetVec(op.Rn); 53 Operand m = GetVec(op.Rm); 54 55 Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n); 56 57 if (op.RegisterSize == RegisterSize.Simd64) 58 { 59 res = context.VectorZeroUpper64(res); 60 } 61 62 context.Copy(GetVec(op.Rd), res); 63 } 64 else 65 { 66 EmitVectorBinaryOpZx(context, (op1, op2) => 67 { 68 return context.BitwiseAnd(op1, context.BitwiseNot(op2)); 69 }); 70 } 71 } 72 73 public static void Bic_Vi(ArmEmitterContext context) 74 { 75 if (Optimizations.UseSse2) 76 { 77 OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; 78 79 int eSize = 8 << op.Size; 80 81 Operand d = GetVec(op.Rd); 82 Operand imm = eSize switch 83 { 84 16 => X86GetAllElements(context, (short)~op.Immediate), 85 32 => X86GetAllElements(context, (int)~op.Immediate), 86 _ => throw new InvalidOperationException($"Invalid element size {eSize}."), 87 }; 88 89 Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm); 90 91 if (op.RegisterSize == RegisterSize.Simd64) 92 { 93 res = context.VectorZeroUpper64(res); 94 } 95 96 context.Copy(GetVec(op.Rd), res); 97 } 98 else 99 { 100 EmitVectorImmBinaryOp(context, (op1, op2) => 101 { 102 return context.BitwiseAnd(op1, context.BitwiseNot(op2)); 103 }); 104 } 105 } 106 107 public static void Bif_V(ArmEmitterContext context) 108 { 109 if (Optimizations.UseAdvSimd) 110 { 111 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV); 112 } 113 else 114 { 115 EmitBifBit(context, notRm: true); 116 } 117 } 118 119 public static void Bit_V(ArmEmitterContext context) 120 { 121 if (Optimizations.UseAdvSimd) 122 { 123 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV); 124 } 125 else 126 { 127 EmitBifBit(context, notRm: false); 128 } 129 } 130 131 private static void EmitBifBit(ArmEmitterContext context, bool notRm) 132 { 133 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 134 135 if (Optimizations.UseSse2) 136 { 137 Operand d = GetVec(op.Rd); 138 Operand n = GetVec(op.Rn); 139 Operand m = GetVec(op.Rm); 140 141 Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d); 142 143 if (notRm) 144 { 145 res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res); 146 } 147 else 148 { 149 res = context.AddIntrinsic(Intrinsic.X86Pand, m, res); 150 } 151 152 res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res); 153 154 if (op.RegisterSize == RegisterSize.Simd64) 155 { 156 res = context.VectorZeroUpper64(res); 157 } 158 159 context.Copy(d, res); 160 } 161 else 162 { 163 Operand res = context.VectorZero(); 164 165 int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; 166 167 for (int index = 0; index < elems; index++) 168 { 169 Operand d = EmitVectorExtractZx(context, op.Rd, index, 3); 170 Operand n = EmitVectorExtractZx(context, op.Rn, index, 3); 171 Operand m = EmitVectorExtractZx(context, op.Rm, index, 3); 172 173 if (notRm) 174 { 175 m = context.BitwiseNot(m); 176 } 177 178 Operand e = context.BitwiseExclusiveOr(d, n); 179 180 e = context.BitwiseAnd(e, m); 181 e = context.BitwiseExclusiveOr(e, d); 182 183 res = EmitVectorInsert(context, res, e, index, 3); 184 } 185 186 context.Copy(GetVec(op.Rd), res); 187 } 188 } 189 190 public static void Bsl_V(ArmEmitterContext context) 191 { 192 if (Optimizations.UseAdvSimd) 193 { 194 InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV); 195 } 196 else if (Optimizations.UseSse2) 197 { 198 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 199 200 Operand d = GetVec(op.Rd); 201 Operand n = GetVec(op.Rn); 202 Operand m = GetVec(op.Rm); 203 204 Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); 205 206 res = context.AddIntrinsic(Intrinsic.X86Pand, res, d); 207 res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m); 208 209 if (op.RegisterSize == RegisterSize.Simd64) 210 { 211 res = context.VectorZeroUpper64(res); 212 } 213 214 context.Copy(d, res); 215 } 216 else 217 { 218 EmitVectorTernaryOpZx(context, (op1, op2, op3) => 219 { 220 return context.BitwiseExclusiveOr( 221 context.BitwiseAnd(op1, 222 context.BitwiseExclusiveOr(op2, op3)), op3); 223 }); 224 } 225 } 226 227 public static void Eor_V(ArmEmitterContext context) 228 { 229 if (Optimizations.UseAdvSimd) 230 { 231 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV); 232 } 233 else if (Optimizations.UseSse2) 234 { 235 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 236 237 Operand n = GetVec(op.Rn); 238 Operand m = GetVec(op.Rm); 239 240 Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); 241 242 if (op.RegisterSize == RegisterSize.Simd64) 243 { 244 res = context.VectorZeroUpper64(res); 245 } 246 247 context.Copy(GetVec(op.Rd), res); 248 } 249 else 250 { 251 EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); 252 } 253 } 254 255 public static void Not_V(ArmEmitterContext context) 256 { 257 if (Optimizations.UseAvx512Ortho) 258 { 259 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 260 261 Operand n = GetVec(op.Rn); 262 263 Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010)); 264 265 if (op.RegisterSize == RegisterSize.Simd64) 266 { 267 res = context.VectorZeroUpper64(res); 268 } 269 270 context.Copy(GetVec(op.Rd), res); 271 } 272 else if (Optimizations.UseSse2) 273 { 274 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 275 276 Operand n = GetVec(op.Rn); 277 278 Operand mask = X86GetAllElements(context, -1L); 279 280 Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask); 281 282 if (op.RegisterSize == RegisterSize.Simd64) 283 { 284 res = context.VectorZeroUpper64(res); 285 } 286 287 context.Copy(GetVec(op.Rd), res); 288 } 289 else 290 { 291 EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1)); 292 } 293 } 294 295 public static void Orn_V(ArmEmitterContext context) 296 { 297 if (Optimizations.UseAdvSimd) 298 { 299 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); 300 } 301 else if (Optimizations.UseAvx512Ortho) 302 { 303 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 304 305 Operand n = GetVec(op.Rn); 306 Operand m = GetVec(op.Rm); 307 308 Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010)); 309 310 if (op.RegisterSize == RegisterSize.Simd64) 311 { 312 res = context.VectorZeroUpper64(res); 313 } 314 315 context.Copy(GetVec(op.Rd), res); 316 } 317 else if (Optimizations.UseSse2) 318 { 319 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 320 321 Operand n = GetVec(op.Rn); 322 Operand m = GetVec(op.Rm); 323 324 Operand mask = X86GetAllElements(context, -1L); 325 326 Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask); 327 328 res = context.AddIntrinsic(Intrinsic.X86Por, res, n); 329 330 if (op.RegisterSize == RegisterSize.Simd64) 331 { 332 res = context.VectorZeroUpper64(res); 333 } 334 335 context.Copy(GetVec(op.Rd), res); 336 } 337 else 338 { 339 EmitVectorBinaryOpZx(context, (op1, op2) => 340 { 341 return context.BitwiseOr(op1, context.BitwiseNot(op2)); 342 }); 343 } 344 } 345 346 public static void Orr_V(ArmEmitterContext context) 347 { 348 if (Optimizations.UseAdvSimd) 349 { 350 InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV); 351 } 352 else if (Optimizations.UseSse2) 353 { 354 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 355 356 Operand n = GetVec(op.Rn); 357 Operand m = GetVec(op.Rm); 358 359 Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m); 360 361 if (op.RegisterSize == RegisterSize.Simd64) 362 { 363 res = context.VectorZeroUpper64(res); 364 } 365 366 context.Copy(GetVec(op.Rd), res); 367 } 368 else 369 { 370 EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2)); 371 } 372 } 373 374 public static void Orr_Vi(ArmEmitterContext context) 375 { 376 if (Optimizations.UseSse2) 377 { 378 OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; 379 380 int eSize = 8 << op.Size; 381 382 Operand d = GetVec(op.Rd); 383 Operand imm = eSize switch 384 { 385 16 => X86GetAllElements(context, (short)op.Immediate), 386 32 => X86GetAllElements(context, (int)op.Immediate), 387 _ => throw new InvalidOperationException($"Invalid element size {eSize}."), 388 }; 389 390 Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm); 391 392 if (op.RegisterSize == RegisterSize.Simd64) 393 { 394 res = context.VectorZeroUpper64(res); 395 } 396 397 context.Copy(GetVec(op.Rd), res); 398 } 399 else 400 { 401 EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2)); 402 } 403 } 404 405 public static void Rbit_V(ArmEmitterContext context) 406 { 407 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 408 409 if (Optimizations.UseGfni) 410 { 411 const long BitMatrix = 412 (0b10000000L << 56) | 413 (0b01000000L << 48) | 414 (0b00100000L << 40) | 415 (0b00010000L << 32) | 416 (0b00001000L << 24) | 417 (0b00000100L << 16) | 418 (0b00000010L << 8) | 419 (0b00000001L << 0); 420 421 Operand vBitMatrix = X86GetAllElements(context, BitMatrix); 422 423 Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0)); 424 425 if (op.RegisterSize == RegisterSize.Simd64) 426 { 427 res = context.VectorZeroUpper64(res); 428 } 429 430 context.Copy(GetVec(op.Rd), res); 431 } 432 else 433 { 434 Operand res = context.VectorZero(); 435 int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; 436 437 for (int index = 0; index < elems; index++) 438 { 439 Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); 440 441 Operand de = EmitReverseBits8Op(context, ne); 442 443 res = EmitVectorInsert(context, res, de, index, 0); 444 } 445 446 context.Copy(GetVec(op.Rd), res); 447 } 448 } 449 450 private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op) 451 { 452 Debug.Assert(op.Type == OperandType.I64); 453 454 Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)), 455 context.ShiftLeft(context.BitwiseAnd(op, Const(0x55ul)), Const(1))); 456 457 val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)), 458 context.ShiftLeft(context.BitwiseAnd(val, Const(0x33ul)), Const(2))); 459 460 return context.BitwiseOr(context.ShiftRightUI(val, Const(4)), 461 context.ShiftLeft(context.BitwiseAnd(val, Const(0x0ful)), Const(4))); 462 } 463 464 public static void Rev16_V(ArmEmitterContext context) 465 { 466 if (Optimizations.UseSsse3) 467 { 468 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 469 470 Operand n = GetVec(op.Rn); 471 472 const long MaskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0; 473 const long MaskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0; 474 475 Operand mask = X86GetScalar(context, MaskE0); 476 477 mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3); 478 479 Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); 480 481 if (op.RegisterSize == RegisterSize.Simd64) 482 { 483 res = context.VectorZeroUpper64(res); 484 } 485 486 context.Copy(GetVec(op.Rd), res); 487 } 488 else 489 { 490 EmitRev_V(context, containerSize: 1); 491 } 492 } 493 494 public static void Rev32_V(ArmEmitterContext context) 495 { 496 if (Optimizations.UseSsse3) 497 { 498 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 499 500 Operand n = GetVec(op.Rn); 501 502 Operand mask; 503 504 if (op.Size == 0) 505 { 506 const long MaskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0; 507 const long MaskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0; 508 509 mask = X86GetScalar(context, MaskE0); 510 511 mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3); 512 } 513 else /* if (op.Size == 1) */ 514 { 515 const long MaskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0; 516 const long MaskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0; 517 518 mask = X86GetScalar(context, MaskE0); 519 520 mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3); 521 } 522 523 Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); 524 525 if (op.RegisterSize == RegisterSize.Simd64) 526 { 527 res = context.VectorZeroUpper64(res); 528 } 529 530 context.Copy(GetVec(op.Rd), res); 531 } 532 else 533 { 534 EmitRev_V(context, containerSize: 2); 535 } 536 } 537 538 public static void Rev64_V(ArmEmitterContext context) 539 { 540 if (Optimizations.UseSsse3) 541 { 542 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 543 544 Operand n = GetVec(op.Rn); 545 546 Operand mask; 547 548 if (op.Size == 0) 549 { 550 const long MaskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0; 551 const long MaskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0; 552 553 mask = X86GetScalar(context, MaskE0); 554 555 mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3); 556 } 557 else if (op.Size == 1) 558 { 559 const long MaskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0; 560 const long MaskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0; 561 562 mask = X86GetScalar(context, MaskE0); 563 564 mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3); 565 } 566 else /* if (op.Size == 2) */ 567 { 568 const long MaskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0; 569 const long MaskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0; 570 571 mask = X86GetScalar(context, MaskE0); 572 573 mask = EmitVectorInsert(context, mask, Const(MaskE1), 1, 3); 574 } 575 576 Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); 577 578 if (op.RegisterSize == RegisterSize.Simd64) 579 { 580 res = context.VectorZeroUpper64(res); 581 } 582 583 context.Copy(GetVec(op.Rd), res); 584 } 585 else 586 { 587 EmitRev_V(context, containerSize: 3); 588 } 589 } 590 591 private static void EmitRev_V(ArmEmitterContext context, int containerSize) 592 { 593 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 594 595 Operand res = context.VectorZero(); 596 597 int elems = op.GetBytesCount() >> op.Size; 598 599 int containerMask = (1 << (containerSize - op.Size)) - 1; 600 601 for (int index = 0; index < elems; index++) 602 { 603 int revIndex = index ^ containerMask; 604 605 Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); 606 607 res = EmitVectorInsert(context, res, ne, index, op.Size); 608 } 609 610 context.Copy(GetVec(op.Rd), res); 611 } 612 } 613 }