InstEmitSimdMove32.cs
1 using ARMeilleure.Decoders; 2 using ARMeilleure.IntermediateRepresentation; 3 using ARMeilleure.Translation; 4 using System; 5 using static ARMeilleure.Instructions.InstEmitHelper; 6 using static ARMeilleure.Instructions.InstEmitSimdHelper; 7 using static ARMeilleure.Instructions.InstEmitSimdHelper32; 8 using static ARMeilleure.IntermediateRepresentation.Operand.Factory; 9 10 namespace ARMeilleure.Instructions 11 { 12 static partial class InstEmit32 13 { 14 #region "Masks" 15 // Same as InstEmitSimdMove, as the instructions do the same thing. 16 private static readonly long[] _masksE0_Uzp = new long[] 17 { 18 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, 19 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0, 20 }; 21 22 private static readonly long[] _masksE1_Uzp = new long[] 23 { 24 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, 25 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0, 26 }; 27 #endregion 28 29 public static void Vmov_I(ArmEmitterContext context) 30 { 31 EmitVectorImmUnaryOp32(context, (op1) => op1); 32 } 33 34 public static void Vmvn_I(ArmEmitterContext context) 35 { 36 if (Optimizations.UseAvx512Ortho) 37 { 38 EmitVectorUnaryOpSimd32(context, (op1) => 39 { 40 return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101)); 41 }); 42 } 43 else if (Optimizations.UseSse2) 44 { 45 EmitVectorUnaryOpSimd32(context, (op1) => 46 { 47 Operand mask = X86GetAllElements(context, -1L); 48 return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask); 49 }); 50 } 51 else 52 { 53 EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1)); 54 } 55 } 56 57 public static void Vmvn_II(ArmEmitterContext context) 58 { 59 EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1)); 60 } 61 62 public static void Vmov_GS(ArmEmitterContext context) 63 { 64 OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp; 65 66 Operand vec = GetVecA32(op.Vn >> 2); 67 if (op.Op == 1) 68 { 69 // To general purpose. 70 Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3); 71 SetIntA32(context, op.Rt, value); 72 } 73 else 74 { 75 // From general purpose. 76 Operand value = GetIntA32(context, op.Rt); 77 context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); 78 } 79 } 80 81 public static void Vmov_G1(ArmEmitterContext context) 82 { 83 OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp; 84 85 int index = op.Index + ((op.Vd & 1) << (3 - op.Size)); 86 if (op.Op == 1) 87 { 88 // To general purpose. 89 Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U); 90 SetIntA32(context, op.Rt, value); 91 } 92 else 93 { 94 // From general purpose. 95 Operand vec = GetVecA32(op.Vd >> 1); 96 Operand value = GetIntA32(context, op.Rt); 97 context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size)); 98 } 99 } 100 101 public static void Vmov_G2(ArmEmitterContext context) 102 { 103 OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; 104 105 Operand vec = GetVecA32(op.Vm >> 2); 106 int vm1 = op.Vm + 1; 107 bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2); 108 Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2); 109 if (op.Op == 1) 110 { 111 // To general purpose. 112 Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3); 113 SetIntA32(context, op.Rt, lowValue); 114 115 Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3); 116 SetIntA32(context, op.Rt2, highValue); 117 } 118 else 119 { 120 // From general purpose. 121 Operand lowValue = GetIntA32(context, op.Rt); 122 Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3); 123 124 Operand highValue = GetIntA32(context, op.Rt2); 125 126 if (sameOwnerVec) 127 { 128 context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3)); 129 } 130 else 131 { 132 context.Copy(vec, resultVec); 133 context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3)); 134 } 135 } 136 } 137 138 public static void Vmov_GD(ArmEmitterContext context) 139 { 140 OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; 141 142 Operand vec = GetVecA32(op.Vm >> 1); 143 if (op.Op == 1) 144 { 145 // To general purpose. 146 Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1); 147 SetIntA32(context, op.Rt, context.ConvertI64ToI32(value)); 148 SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32)))); 149 } 150 else 151 { 152 // From general purpose. 153 Operand lowValue = GetIntA32(context, op.Rt); 154 Operand highValue = GetIntA32(context, op.Rt2); 155 156 Operand value = context.BitwiseOr( 157 context.ZeroExtend32(OperandType.I64, lowValue), 158 context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32))); 159 160 context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1)); 161 } 162 } 163 164 public static void Vmovl(ArmEmitterContext context) 165 { 166 OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp; 167 168 Operand res = context.VectorZero(); 169 170 int elems = op.GetBytesCount() >> op.Size; 171 172 for (int index = 0; index < elems; index++) 173 { 174 Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U); 175 176 if (op.Size == 2) 177 { 178 if (op.U) 179 { 180 me = context.ZeroExtend32(OperandType.I64, me); 181 } 182 else 183 { 184 me = context.SignExtend32(OperandType.I64, me); 185 } 186 } 187 188 res = EmitVectorInsert(context, res, me, index, op.Size + 1); 189 } 190 191 context.Copy(GetVecA32(op.Qd), res); 192 } 193 194 public static void Vswp(ArmEmitterContext context) 195 { 196 OpCode32Simd op = (OpCode32Simd)context.CurrOp; 197 198 if (op.Q) 199 { 200 Operand temp = context.Copy(GetVecA32(op.Qd)); 201 202 context.Copy(GetVecA32(op.Qd), GetVecA32(op.Qm)); 203 context.Copy(GetVecA32(op.Qm), temp); 204 } 205 else 206 { 207 Operand temp = ExtractScalar(context, OperandType.I64, op.Vd); 208 209 InsertScalar(context, op.Vd, ExtractScalar(context, OperandType.I64, op.Vm)); 210 InsertScalar(context, op.Vm, temp); 211 } 212 } 213 214 public static void Vtbl(ArmEmitterContext context) 215 { 216 OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; 217 218 bool extension = op.Opc == 1; 219 int length = op.Length + 1; 220 221 if (Optimizations.UseSsse3) 222 { 223 Operand d = GetVecA32(op.Qd); 224 Operand m = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qm), op.Vm, 0); 225 226 Operand res; 227 Operand mask = X86GetAllElements(context, 0x0707070707070707L); 228 229 // Fast path for single register table. 230 { 231 Operand n = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qn), op.Vn, 0); 232 233 Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask); 234 mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m); 235 236 res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask); 237 } 238 239 for (int index = 1; index < length; index++) 240 { 241 int newVn = (op.Vn + index) & 0x1F; 242 (int qn, _) = GetQuadwordAndSubindex(newVn, op.RegisterSize); 243 Operand ni = EmitMoveDoubleWordToSide(context, GetVecA32(qn), newVn, 0); 244 245 Operand idxMask = X86GetAllElements(context, 0x0808080808080808L * index); 246 247 Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask); 248 249 Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask); 250 mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask); 251 252 Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask); 253 254 res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); 255 } 256 257 if (extension) 258 { 259 Operand idxMask = X86GetAllElements(context, (0x0808080808080808L * length) - 0x0101010101010101L); 260 Operand zeroMask = context.VectorZero(); 261 262 Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask); 263 Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m); 264 265 Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask); 266 267 Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, EmitMoveDoubleWordToSide(context, d, op.Vd, 0), mMask); 268 269 res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask); 270 } 271 272 res = EmitMoveDoubleWordToSide(context, res, 0, op.Vd); 273 274 context.Copy(d, EmitDoubleWordInsert(context, d, res, op.Vd)); 275 } 276 else 277 { 278 int elems = op.GetBytesCount() >> op.Size; 279 280 (int Qx, int Ix)[] tableTuples = new (int, int)[length]; 281 for (int i = 0; i < length; i++) 282 { 283 tableTuples[i] = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize); 284 } 285 286 int byteLength = length * 8; 287 288 Operand res = GetVecA32(op.Qd); 289 Operand m = GetVecA32(op.Qm); 290 291 for (int index = 0; index < elems; index++) 292 { 293 Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im)); 294 295 Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); 296 Operand elemRes = default; // Note: This is I64 for ease of calculation. 297 298 // TODO: Branching rather than conditional select. 299 300 // Get indexed byte. 301 // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result. 302 // This does have to extract `length` times for every element but certainly not as bad as it could be. 303 304 // Which vector number is the index on. 305 Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3)); 306 // What should we shift by to extract it. 307 Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3)); 308 309 for (int i = 0; i < length; i++) 310 { 311 (int qx, int ix) = tableTuples[i]; 312 // Get the whole vector, we'll get a byte out of it. 313 Operand lookupResult; 314 if (qx == op.Qd) 315 { 316 // Result contains the current state of the vector. 317 lookupResult = context.VectorExtract(OperandType.I64, res, ix); 318 } 319 else 320 { 321 lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64 322 } 323 324 lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector. 325 326 if (i == 0) 327 { 328 elemRes = lookupResult; // First result is always default. 329 } 330 else 331 { 332 Operand isThisElem = context.ICompareEqual(vecIndex, Const(i)); 333 elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes); 334 } 335 } 336 337 Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L); 338 339 res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0); 340 } 341 342 context.Copy(GetVecA32(op.Qd), res); 343 } 344 } 345 346 public static void Vtrn(ArmEmitterContext context) 347 { 348 OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; 349 350 if (Optimizations.UseSsse3) 351 { 352 EmitVectorShuffleOpSimd32(context, (m, d) => 353 { 354 Operand mask = default; 355 356 if (op.Size < 3) 357 { 358 long maskE0 = EvenMasks[op.Size]; 359 long maskE1 = OddMasks[op.Size]; 360 361 mask = X86GetScalar(context, maskE0); 362 363 mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); 364 } 365 366 if (op.Size < 3) 367 { 368 d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask); 369 m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); 370 } 371 372 Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m); 373 Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m); 374 375 return (resM, resD); 376 }); 377 } 378 else 379 { 380 int elems = op.GetBytesCount() >> op.Size; 381 int pairs = elems >> 1; 382 383 bool overlap = op.Qm == op.Qd; 384 385 Operand resD = GetVecA32(op.Qd); 386 Operand resM = GetVecA32(op.Qm); 387 388 for (int index = 0; index < pairs; index++) 389 { 390 int pairIndex = index << 1; 391 Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false); 392 Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false); 393 394 resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size); 395 396 if (overlap) 397 { 398 resM = resD; 399 } 400 401 resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size); 402 403 if (overlap) 404 { 405 resD = resM; 406 } 407 } 408 409 context.Copy(GetVecA32(op.Qd), resD); 410 if (!overlap) 411 { 412 context.Copy(GetVecA32(op.Qm), resM); 413 } 414 } 415 } 416 417 public static void Vzip(ArmEmitterContext context) 418 { 419 OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; 420 421 if (Optimizations.UseAdvSimd) 422 { 423 EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V); 424 } 425 else if (Optimizations.UseSse2) 426 { 427 EmitVectorShuffleOpSimd32(context, (m, d) => 428 { 429 if (op.RegisterSize == RegisterSize.Simd128) 430 { 431 Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m); 432 Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m); 433 434 return (resM, resD); 435 } 436 else 437 { 438 Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m); 439 440 Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero()); 441 Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero()); 442 return (resM, resD); 443 } 444 }); 445 } 446 else 447 { 448 int elems = op.GetBytesCount() >> op.Size; 449 int pairs = elems >> 1; 450 451 bool overlap = op.Qm == op.Qd; 452 453 Operand resD = GetVecA32(op.Qd); 454 Operand resM = GetVecA32(op.Qm); 455 456 for (int index = 0; index < pairs; index++) 457 { 458 int pairIndex = index << 1; 459 Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false); 460 Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false); 461 462 Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false); 463 Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false); 464 465 resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size); 466 resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size); 467 468 if (overlap) 469 { 470 resM = resD; 471 } 472 473 resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size); 474 resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size); 475 476 if (overlap) 477 { 478 resD = resM; 479 } 480 } 481 482 context.Copy(GetVecA32(op.Qd), resD); 483 if (!overlap) 484 { 485 context.Copy(GetVecA32(op.Qm), resM); 486 } 487 } 488 } 489 490 public static void Vuzp(ArmEmitterContext context) 491 { 492 OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; 493 494 if (Optimizations.UseAdvSimd) 495 { 496 EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V); 497 } 498 else if (Optimizations.UseSsse3) 499 { 500 EmitVectorShuffleOpSimd32(context, (m, d) => 501 { 502 if (op.RegisterSize == RegisterSize.Simd128) 503 { 504 Operand mask = default; 505 506 if (op.Size < 3) 507 { 508 long maskE0 = EvenMasks[op.Size]; 509 long maskE1 = OddMasks[op.Size]; 510 511 mask = X86GetScalar(context, maskE0); 512 mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); 513 514 d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask); 515 m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); 516 } 517 518 Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, d, m); 519 Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, d, m); 520 521 return (resM, resD); 522 } 523 else 524 { 525 Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; 526 527 Operand res = context.AddIntrinsic(punpcklInst, d, m); 528 529 if (op.Size < 2) 530 { 531 long maskE0 = _masksE0_Uzp[op.Size]; 532 long maskE1 = _masksE1_Uzp[op.Size]; 533 534 Operand mask = X86GetScalar(context, maskE0); 535 536 mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); 537 538 res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); 539 } 540 541 Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero()); 542 Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero()); 543 544 return (resM, resD); 545 } 546 }); 547 } 548 else 549 { 550 int elems = op.GetBytesCount() >> op.Size; 551 int pairs = elems >> 1; 552 553 bool overlap = op.Qm == op.Qd; 554 555 Operand resD = GetVecA32(op.Qd); 556 Operand resM = GetVecA32(op.Qm); 557 558 for (int index = 0; index < elems; index++) 559 { 560 Operand dIns, mIns; 561 if (index >= pairs) 562 { 563 int pairIndex = index - pairs; 564 dIns = EmitVectorExtract32(context, op.Qm, (pairIndex << 1) + op.Im, op.Size, false); 565 mIns = EmitVectorExtract32(context, op.Qm, ((pairIndex << 1) | 1) + op.Im, op.Size, false); 566 } 567 else 568 { 569 dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false); 570 mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false); 571 } 572 573 resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size); 574 575 if (overlap) 576 { 577 resM = resD; 578 } 579 580 resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size); 581 582 if (overlap) 583 { 584 resD = resM; 585 } 586 } 587 588 context.Copy(GetVecA32(op.Qd), resD); 589 if (!overlap) 590 { 591 context.Copy(GetVecA32(op.Qm), resM); 592 } 593 } 594 } 595 596 private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2) 597 { 598 OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; 599 600 bool overlap = op.Qm == op.Qd; 601 602 Operand d = GetVecA32(op.Qd); 603 Operand m = GetVecA32(op.Qm); 604 605 Operand dPart = d; 606 Operand mPart = m; 607 608 if (!op.Q) // Register swap: move relevant doubleword to destination side. 609 { 610 dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0); 611 mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0); 612 } 613 614 Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64; 615 616 vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); 617 618 Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart); 619 Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart); 620 621 if (!op.Q) // Register insert. 622 { 623 resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0)); 624 625 if (overlap) 626 { 627 resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0)); 628 } 629 else 630 { 631 resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0)); 632 } 633 } 634 635 context.Copy(d, resD); 636 if (!overlap) 637 { 638 context.Copy(m, resM); 639 } 640 } 641 642 private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc) 643 { 644 OpCode32Simd op = (OpCode32Simd)context.CurrOp; 645 646 Operand m = GetVecA32(op.Qm); 647 Operand d = GetVecA32(op.Qd); 648 Operand initialM = m; 649 Operand initialD = d; 650 651 if (!op.Q) // Register swap: move relevant doubleword to side 0, for consistency. 652 { 653 m = EmitMoveDoubleWordToSide(context, m, op.Vm, 0); 654 d = EmitMoveDoubleWordToSide(context, d, op.Vd, 0); 655 } 656 657 (Operand resM, Operand resD) = shuffleFunc(m, d); 658 659 bool overlap = op.Qm == op.Qd; 660 661 if (!op.Q) // Register insert. 662 { 663 resM = EmitDoubleWordInsert(context, initialM, EmitMoveDoubleWordToSide(context, resM, 0, op.Vm), op.Vm); 664 resD = EmitDoubleWordInsert(context, overlap ? resM : initialD, EmitMoveDoubleWordToSide(context, resD, 0, op.Vd), op.Vd); 665 } 666 667 if (!overlap) 668 { 669 context.Copy(initialM, resM); 670 } 671 672 context.Copy(initialD, resD); 673 } 674 } 675 }