InstEmitSimdMove.cs
1 using ARMeilleure.Decoders; 2 using ARMeilleure.IntermediateRepresentation; 3 using ARMeilleure.Translation; 4 using System.Collections.Generic; 5 using System.Reflection; 6 using static ARMeilleure.Instructions.InstEmitHelper; 7 using static ARMeilleure.Instructions.InstEmitSimdHelper; 8 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 9 10 namespace ARMeilleure.Instructions 11 { 12 static partial class InstEmit 13 { 14 #region "Masks" 15 private static readonly long[] _masksE0_Uzp = new long[] 16 { 17 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, 18 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0, 19 }; 20 21 private static readonly long[] _masksE1_Uzp = new long[] 22 { 23 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, 24 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0, 25 }; 26 #endregion 27 28 public static void Dup_Gp(ArmEmitterContext context) 29 { 30 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 31 32 Operand n = GetIntOrZR(context, op.Rn); 33 34 if (Optimizations.UseSse2) 35 { 36 switch (op.Size) 37 { 38 case 0: 39 n = context.ZeroExtend8(n.Type, n); 40 n = context.Multiply(n, Const(n.Type, 0x01010101)); 41 break; 42 case 1: 43 n = context.ZeroExtend16(n.Type, n); 44 n = context.Multiply(n, Const(n.Type, 0x00010001)); 45 break; 46 case 2: 47 n = context.ZeroExtend32(n.Type, n); 48 break; 49 } 50 51 Operand res = context.VectorInsert(context.VectorZero(), n, 0); 52 53 if (op.Size < 3) 54 { 55 if (op.RegisterSize == RegisterSize.Simd64) 56 { 57 res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0)); 58 } 59 else 60 { 61 res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); 62 } 63 } 64 else 65 { 66 res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); 67 } 68 69 context.Copy(GetVec(op.Rd), res); 70 } 71 else 72 { 73 Operand res = context.VectorZero(); 74 75 int elems = op.GetBytesCount() >> op.Size; 76 77 for (int index = 0; index < elems; index++) 78 { 79 res = EmitVectorInsert(context, res, n, index, op.Size); 80 } 81 82 context.Copy(GetVec(op.Rd), res); 83 } 84 } 85 86 public static void Dup_S(ArmEmitterContext context) 87 { 88 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 89 90 Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); 91 92 context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size)); 93 } 94 95 public static void Dup_V(ArmEmitterContext context) 96 { 97 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 98 99 if (Optimizations.UseSse2) 100 { 101 Operand res = GetVec(op.Rn); 102 103 if (op.Size == 0) 104 { 105 if (op.DstIndex != 0) 106 { 107 res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex)); 108 } 109 110 res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res); 111 res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); 112 res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); 113 } 114 else if (op.Size == 1) 115 { 116 if (op.DstIndex != 0) 117 { 118 res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2)); 119 } 120 121 res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); 122 res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); 123 } 124 else if (op.Size == 2) 125 { 126 int mask = op.DstIndex * 0b01010101; 127 128 res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask)); 129 } 130 else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64) 131 { 132 res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); 133 } 134 else if (op.DstIndex == 1) 135 { 136 res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res); 137 } 138 139 if (op.RegisterSize == RegisterSize.Simd64) 140 { 141 res = context.VectorZeroUpper64(res); 142 } 143 144 context.Copy(GetVec(op.Rd), res); 145 } 146 else 147 { 148 Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); 149 150 Operand res = context.VectorZero(); 151 152 int elems = op.GetBytesCount() >> op.Size; 153 154 for (int index = 0; index < elems; index++) 155 { 156 res = EmitVectorInsert(context, res, ne, index, op.Size); 157 } 158 159 context.Copy(GetVec(op.Rd), res); 160 } 161 } 162 163 public static void Ext_V(ArmEmitterContext context) 164 { 165 OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp; 166 167 if (Optimizations.UseSse2) 168 { 169 Operand nShifted = GetVec(op.Rn); 170 171 if (op.RegisterSize == RegisterSize.Simd64) 172 { 173 nShifted = context.VectorZeroUpper64(nShifted); 174 } 175 176 nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4)); 177 178 Operand mShifted = GetVec(op.Rm); 179 180 mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4)); 181 182 if (op.RegisterSize == RegisterSize.Simd64) 183 { 184 mShifted = context.VectorZeroUpper64(mShifted); 185 } 186 187 Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted); 188 189 context.Copy(GetVec(op.Rd), res); 190 } 191 else 192 { 193 Operand res = context.VectorZero(); 194 195 int bytes = op.GetBytesCount(); 196 197 int position = op.Imm4 & (bytes - 1); 198 199 for (int index = 0; index < bytes; index++) 200 { 201 int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; 202 203 Operand e = EmitVectorExtractZx(context, reg, position, 0); 204 205 position = (position + 1) & (bytes - 1); 206 207 res = EmitVectorInsert(context, res, e, index, 0); 208 } 209 210 context.Copy(GetVec(op.Rd), res); 211 } 212 } 213 214 public static void Fcsel_S(ArmEmitterContext context) 215 { 216 OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; 217 218 Operand lblTrue = Label(); 219 Operand lblEnd = Label(); 220 221 Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond); 222 223 context.BranchIfTrue(lblTrue, isTrue); 224 225 OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; 226 227 Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); 228 229 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0)); 230 231 context.Branch(lblEnd); 232 233 context.MarkLabel(lblTrue); 234 235 Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); 236 237 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); 238 239 context.MarkLabel(lblEnd); 240 } 241 242 public static void Fmov_Ftoi(ArmEmitterContext context) 243 { 244 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 245 246 Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2); 247 248 SetIntOrZR(context, op.Rd, ne); 249 } 250 251 public static void Fmov_Ftoi1(ArmEmitterContext context) 252 { 253 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 254 255 Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3); 256 257 SetIntOrZR(context, op.Rd, ne); 258 } 259 260 public static void Fmov_Itof(ArmEmitterContext context) 261 { 262 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 263 264 Operand n = GetIntOrZR(context, op.Rn); 265 266 context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2)); 267 } 268 269 public static void Fmov_Itof1(ArmEmitterContext context) 270 { 271 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 272 273 Operand d = GetVec(op.Rd); 274 Operand n = GetIntOrZR(context, op.Rn); 275 276 context.Copy(d, EmitVectorInsert(context, d, n, 1, 3)); 277 } 278 279 public static void Fmov_S(ArmEmitterContext context) 280 { 281 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 282 283 OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; 284 285 Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); 286 287 context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); 288 } 289 290 public static void Fmov_Si(ArmEmitterContext context) 291 { 292 OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp; 293 294 if (Optimizations.UseSse2) 295 { 296 if (op.Size == 0) 297 { 298 context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate)); 299 } 300 else 301 { 302 context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); 303 } 304 } 305 else 306 { 307 Operand e = Const(op.Immediate); 308 309 Operand res = context.VectorZero(); 310 311 res = EmitVectorInsert(context, res, e, 0, op.Size + 2); 312 313 context.Copy(GetVec(op.Rd), res); 314 } 315 } 316 317 public static void Fmov_Vi(ArmEmitterContext context) 318 { 319 OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; 320 321 if (Optimizations.UseSse2) 322 { 323 if (op.RegisterSize == RegisterSize.Simd128) 324 { 325 context.Copy(GetVec(op.Rd), X86GetAllElements(context, op.Immediate)); 326 } 327 else 328 { 329 context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); 330 } 331 } 332 else 333 { 334 Operand e = Const(op.Immediate); 335 336 Operand res = context.VectorZero(); 337 338 int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; 339 340 for (int index = 0; index < elems; index++) 341 { 342 res = EmitVectorInsert(context, res, e, index, 3); 343 } 344 345 context.Copy(GetVec(op.Rd), res); 346 } 347 } 348 349 public static void Ins_Gp(ArmEmitterContext context) 350 { 351 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 352 353 Operand d = GetVec(op.Rd); 354 Operand n = GetIntOrZR(context, op.Rn); 355 356 context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size)); 357 } 358 359 public static void Ins_V(ArmEmitterContext context) 360 { 361 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 362 363 Operand d = GetVec(op.Rd); 364 Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); 365 366 context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size)); 367 } 368 369 public static void Movi_V(ArmEmitterContext context) 370 { 371 if (Optimizations.UseSse2) 372 { 373 EmitSse2VectorMoviMvniOp(context, not: false); 374 } 375 else 376 { 377 EmitVectorImmUnaryOp(context, (op1) => op1); 378 } 379 } 380 381 public static void Mvni_V(ArmEmitterContext context) 382 { 383 if (Optimizations.UseSse2) 384 { 385 EmitSse2VectorMoviMvniOp(context, not: true); 386 } 387 else 388 { 389 EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1)); 390 } 391 } 392 393 public static void Smov_S(ArmEmitterContext context) 394 { 395 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 396 397 Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); 398 399 if (op.RegisterSize == RegisterSize.Simd64) 400 { 401 ne = context.ZeroExtend32(OperandType.I64, ne); 402 } 403 404 SetIntOrZR(context, op.Rd, ne); 405 } 406 407 public static void Tbl_V(ArmEmitterContext context) 408 { 409 EmitTableVectorLookup(context, isTbl: true); 410 } 411 412 public static void Tbx_V(ArmEmitterContext context) 413 { 414 EmitTableVectorLookup(context, isTbl: false); 415 } 416 417 public static void Trn1_V(ArmEmitterContext context) 418 { 419 EmitVectorTranspose(context, part: 0); 420 } 421 422 public static void Trn2_V(ArmEmitterContext context) 423 { 424 EmitVectorTranspose(context, part: 1); 425 } 426 427 public static void Umov_S(ArmEmitterContext context) 428 { 429 OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; 430 431 Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); 432 433 SetIntOrZR(context, op.Rd, ne); 434 } 435 436 public static void Uzp1_V(ArmEmitterContext context) 437 { 438 EmitVectorUnzip(context, part: 0); 439 } 440 441 public static void Uzp2_V(ArmEmitterContext context) 442 { 443 EmitVectorUnzip(context, part: 1); 444 } 445 446 public static void Xtn_V(ArmEmitterContext context) 447 { 448 OpCodeSimd op = (OpCodeSimd)context.CurrOp; 449 450 if (Optimizations.UseSsse3) 451 { 452 Operand d = GetVec(op.Rd); 453 454 Operand res = context.VectorZeroUpper64(d); 455 456 Operand mask = X86GetAllElements(context, EvenMasks[op.Size]); 457 458 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, GetVec(op.Rn), mask); 459 460 Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 461 ? Intrinsic.X86Movlhps 462 : Intrinsic.X86Movhlps; 463 464 res = context.AddIntrinsic(movInst, res, res2); 465 466 context.Copy(d, res); 467 } 468 else 469 { 470 int elems = 8 >> op.Size; 471 472 int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; 473 474 Operand d = GetVec(op.Rd); 475 476 Operand res = part == 0 ? context.VectorZero() : context.Copy(d); 477 478 for (int index = 0; index < elems; index++) 479 { 480 Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); 481 482 res = EmitVectorInsert(context, res, ne, part + index, op.Size); 483 } 484 485 context.Copy(d, res); 486 } 487 } 488 489 public static void Zip1_V(ArmEmitterContext context) 490 { 491 EmitVectorZip(context, part: 0); 492 } 493 494 public static void Zip2_V(ArmEmitterContext context) 495 { 496 EmitVectorZip(context, part: 1); 497 } 498 499 private static void EmitSse2VectorMoviMvniOp(ArmEmitterContext context, bool not) 500 { 501 OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; 502 503 long imm = op.Immediate; 504 505 switch (op.Size) 506 { 507 case 0: 508 imm *= 0x01010101; 509 break; 510 case 1: 511 imm *= 0x00010001; 512 break; 513 } 514 515 if (not) 516 { 517 imm = ~imm; 518 } 519 520 Operand mask; 521 522 if (op.Size < 3) 523 { 524 mask = X86GetAllElements(context, (int)imm); 525 } 526 else 527 { 528 mask = X86GetAllElements(context, imm); 529 } 530 531 if (op.RegisterSize == RegisterSize.Simd64) 532 { 533 mask = context.VectorZeroUpper64(mask); 534 } 535 536 context.Copy(GetVec(op.Rd), mask); 537 } 538 539 private static void EmitTableVectorLookup(ArmEmitterContext context, bool isTbl) 540 { 541 OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; 542 543 if (Optimizations.UseSsse3) 544 { 545 Operand d = GetVec(op.Rd); 546 Operand m = GetVec(op.Rm); 547 548 Operand res; 549 550 Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL); 551 552 // Fast path for single register table. 553 { 554 Operand n = GetVec(op.Rn); 555 556 Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask); 557 mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m); 558 559 res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask); 560 } 561 562 for (int index = 1; index < op.Size; index++) 563 { 564 Operand ni = GetVec((op.Rn + index) & 0x1F); 565 566 Operand idxMask = X86GetAllElements(context, 0x1010101010101010L * index); 567 568 Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask); 569 570 Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask); 571 mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask); 572 573 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask); 574 575 res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); 576 } 577 578 if (!isTbl) 579 { 580 Operand idxMask = X86GetAllElements(context, (0x1010101010101010L * op.Size) - 0x0101010101010101L); 581 Operand zeroMask = context.VectorZero(); 582 583 Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask); 584 Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m); 585 586 Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask); 587 588 Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, d, mMask); 589 590 res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask); 591 } 592 593 if (op.RegisterSize == RegisterSize.Simd64) 594 { 595 res = context.VectorZeroUpper64(res); 596 } 597 598 context.Copy(d, res); 599 } 600 else 601 { 602 Operand d = GetVec(op.Rd); 603 604 List<Operand> args = new(); 605 606 if (!isTbl) 607 { 608 args.Add(d); 609 } 610 611 args.Add(GetVec(op.Rm)); 612 613 args.Add(Const(op.RegisterSize == RegisterSize.Simd64 ? 8 : 16)); 614 615 for (int index = 0; index < op.Size; index++) 616 { 617 args.Add(GetVec((op.Rn + index) & 0x1F)); 618 } 619 620 MethodInfo info = null; 621 622 if (isTbl) 623 { 624 switch (op.Size) 625 { 626 case 1: 627 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1)); 628 break; 629 case 2: 630 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2)); 631 break; 632 case 3: 633 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3)); 634 break; 635 case 4: 636 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4)); 637 break; 638 } 639 } 640 else 641 { 642 switch (op.Size) 643 { 644 case 1: 645 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1)); 646 break; 647 case 2: 648 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2)); 649 break; 650 case 3: 651 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3)); 652 break; 653 case 4: 654 info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4)); 655 break; 656 } 657 } 658 659 context.Copy(d, context.Call(info, args.ToArray())); 660 } 661 } 662 663 private static void EmitVectorTranspose(ArmEmitterContext context, int part) 664 { 665 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 666 667 if (Optimizations.UseSsse3) 668 { 669 Operand mask = default; 670 671 if (op.Size < 3) 672 { 673 long maskE0 = EvenMasks[op.Size]; 674 long maskE1 = OddMasks[op.Size]; 675 676 mask = X86GetScalar(context, maskE0); 677 678 mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); 679 } 680 681 Operand n = GetVec(op.Rn); 682 683 if (op.Size < 3) 684 { 685 n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); 686 } 687 688 Operand m = GetVec(op.Rm); 689 690 if (op.Size < 3) 691 { 692 m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); 693 } 694 695 Intrinsic punpckInst = part == 0 696 ? X86PunpcklInstruction[op.Size] 697 : X86PunpckhInstruction[op.Size]; 698 699 Operand res = context.AddIntrinsic(punpckInst, n, m); 700 701 if (op.RegisterSize == RegisterSize.Simd64) 702 { 703 res = context.VectorZeroUpper64(res); 704 } 705 706 context.Copy(GetVec(op.Rd), res); 707 } 708 else 709 { 710 Operand res = context.VectorZero(); 711 712 int pairs = op.GetPairsCount() >> op.Size; 713 714 for (int index = 0; index < pairs; index++) 715 { 716 int pairIndex = index << 1; 717 718 Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size); 719 Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size); 720 721 res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); 722 res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); 723 } 724 725 context.Copy(GetVec(op.Rd), res); 726 } 727 } 728 729 private static void EmitVectorUnzip(ArmEmitterContext context, int part) 730 { 731 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 732 733 if (Optimizations.UseSsse3) 734 { 735 if (op.RegisterSize == RegisterSize.Simd128) 736 { 737 Operand mask = default; 738 739 if (op.Size < 3) 740 { 741 long maskE0 = EvenMasks[op.Size]; 742 long maskE1 = OddMasks[op.Size]; 743 744 mask = X86GetScalar(context, maskE0); 745 746 mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); 747 } 748 749 Operand n = GetVec(op.Rn); 750 751 if (op.Size < 3) 752 { 753 n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); 754 } 755 756 Operand m = GetVec(op.Rm); 757 758 if (op.Size < 3) 759 { 760 m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); 761 } 762 763 Intrinsic punpckInst = part == 0 764 ? Intrinsic.X86Punpcklqdq 765 : Intrinsic.X86Punpckhqdq; 766 767 Operand res = context.AddIntrinsic(punpckInst, n, m); 768 769 context.Copy(GetVec(op.Rd), res); 770 } 771 else 772 { 773 Operand n = GetVec(op.Rn); 774 Operand m = GetVec(op.Rm); 775 776 Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; 777 778 Operand res = context.AddIntrinsic(punpcklInst, n, m); 779 780 if (op.Size < 2) 781 { 782 long maskE0 = _masksE0_Uzp[op.Size]; 783 long maskE1 = _masksE1_Uzp[op.Size]; 784 785 Operand mask = X86GetScalar(context, maskE0); 786 787 mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); 788 789 res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); 790 } 791 792 Intrinsic punpckInst = part == 0 793 ? Intrinsic.X86Punpcklqdq 794 : Intrinsic.X86Punpckhqdq; 795 796 res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); 797 798 context.Copy(GetVec(op.Rd), res); 799 } 800 } 801 else 802 { 803 Operand res = context.VectorZero(); 804 805 int pairs = op.GetPairsCount() >> op.Size; 806 807 for (int index = 0; index < pairs; index++) 808 { 809 int idx = index << 1; 810 811 Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); 812 Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); 813 814 res = EmitVectorInsert(context, res, ne, index, op.Size); 815 res = EmitVectorInsert(context, res, me, pairs + index, op.Size); 816 } 817 818 context.Copy(GetVec(op.Rd), res); 819 } 820 } 821 822 private static void EmitVectorZip(ArmEmitterContext context, int part) 823 { 824 OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; 825 826 if (Optimizations.UseSse2) 827 { 828 Operand n = GetVec(op.Rn); 829 Operand m = GetVec(op.Rm); 830 831 if (op.RegisterSize == RegisterSize.Simd128) 832 { 833 Intrinsic punpckInst = part == 0 834 ? X86PunpcklInstruction[op.Size] 835 : X86PunpckhInstruction[op.Size]; 836 837 Operand res = context.AddIntrinsic(punpckInst, n, m); 838 839 context.Copy(GetVec(op.Rd), res); 840 } 841 else 842 { 843 Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); 844 845 Intrinsic punpckInst = part == 0 846 ? Intrinsic.X86Punpcklqdq 847 : Intrinsic.X86Punpckhqdq; 848 849 res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); 850 851 context.Copy(GetVec(op.Rd), res); 852 } 853 } 854 else 855 { 856 Operand res = context.VectorZero(); 857 858 int pairs = op.GetPairsCount() >> op.Size; 859 860 int baseIndex = part != 0 ? pairs : 0; 861 862 for (int index = 0; index < pairs; index++) 863 { 864 int pairIndex = index << 1; 865 866 Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size); 867 Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size); 868 869 res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); 870 res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); 871 } 872 873 context.Copy(GetVec(op.Rd), res); 874 } 875 } 876 } 877 }