/ src / ARMeilleure / Instructions / InstEmitSimdMove.cs
InstEmitSimdMove.cs
  1  using ARMeilleure.Decoders;
  2  using ARMeilleure.IntermediateRepresentation;
  3  using ARMeilleure.Translation;
  4  using System.Collections.Generic;
  5  using System.Reflection;
  6  using static ARMeilleure.Instructions.InstEmitHelper;
  7  using static ARMeilleure.Instructions.InstEmitSimdHelper;
  8  using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
  9  
 10  namespace ARMeilleure.Instructions
 11  {
 12      static partial class InstEmit
 13      {
 14          #region "Masks"
 15          private static readonly long[] _masksE0_Uzp = new long[]
 16          {
 17              13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0,
 18              11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0,
 19          };
 20  
 21          private static readonly long[] _masksE1_Uzp = new long[]
 22          {
 23              15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0,
 24              15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0,
 25          };
 26          #endregion
 27  
 28          public static void Dup_Gp(ArmEmitterContext context)
 29          {
 30              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
 31  
 32              Operand n = GetIntOrZR(context, op.Rn);
 33  
 34              if (Optimizations.UseSse2)
 35              {
 36                  switch (op.Size)
 37                  {
 38                      case 0:
 39                          n = context.ZeroExtend8(n.Type, n);
 40                          n = context.Multiply(n, Const(n.Type, 0x01010101));
 41                          break;
 42                      case 1:
 43                          n = context.ZeroExtend16(n.Type, n);
 44                          n = context.Multiply(n, Const(n.Type, 0x00010001));
 45                          break;
 46                      case 2:
 47                          n = context.ZeroExtend32(n.Type, n);
 48                          break;
 49                  }
 50  
 51                  Operand res = context.VectorInsert(context.VectorZero(), n, 0);
 52  
 53                  if (op.Size < 3)
 54                  {
 55                      if (op.RegisterSize == RegisterSize.Simd64)
 56                      {
 57                          res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
 58                      }
 59                      else
 60                      {
 61                          res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
 62                      }
 63                  }
 64                  else
 65                  {
 66                      res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
 67                  }
 68  
 69                  context.Copy(GetVec(op.Rd), res);
 70              }
 71              else
 72              {
 73                  Operand res = context.VectorZero();
 74  
 75                  int elems = op.GetBytesCount() >> op.Size;
 76  
 77                  for (int index = 0; index < elems; index++)
 78                  {
 79                      res = EmitVectorInsert(context, res, n, index, op.Size);
 80                  }
 81  
 82                  context.Copy(GetVec(op.Rd), res);
 83              }
 84          }
 85  
 86          public static void Dup_S(ArmEmitterContext context)
 87          {
 88              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
 89  
 90              Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
 91  
 92              context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size));
 93          }
 94  
 95          public static void Dup_V(ArmEmitterContext context)
 96          {
 97              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
 98  
 99              if (Optimizations.UseSse2)
100              {
101                  Operand res = GetVec(op.Rn);
102  
103                  if (op.Size == 0)
104                  {
105                      if (op.DstIndex != 0)
106                      {
107                          res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex));
108                      }
109  
110                      res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res);
111                      res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
112                      res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
113                  }
114                  else if (op.Size == 1)
115                  {
116                      if (op.DstIndex != 0)
117                      {
118                          res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2));
119                      }
120  
121                      res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res);
122                      res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
123                  }
124                  else if (op.Size == 2)
125                  {
126                      int mask = op.DstIndex * 0b01010101;
127  
128                      res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask));
129                  }
130                  else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64)
131                  {
132                      res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
133                  }
134                  else if (op.DstIndex == 1)
135                  {
136                      res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res);
137                  }
138  
139                  if (op.RegisterSize == RegisterSize.Simd64)
140                  {
141                      res = context.VectorZeroUpper64(res);
142                  }
143  
144                  context.Copy(GetVec(op.Rd), res);
145              }
146              else
147              {
148                  Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
149  
150                  Operand res = context.VectorZero();
151  
152                  int elems = op.GetBytesCount() >> op.Size;
153  
154                  for (int index = 0; index < elems; index++)
155                  {
156                      res = EmitVectorInsert(context, res, ne, index, op.Size);
157                  }
158  
159                  context.Copy(GetVec(op.Rd), res);
160              }
161          }
162  
163          public static void Ext_V(ArmEmitterContext context)
164          {
165              OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;
166  
167              if (Optimizations.UseSse2)
168              {
169                  Operand nShifted = GetVec(op.Rn);
170  
171                  if (op.RegisterSize == RegisterSize.Simd64)
172                  {
173                      nShifted = context.VectorZeroUpper64(nShifted);
174                  }
175  
176                  nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));
177  
178                  Operand mShifted = GetVec(op.Rm);
179  
180                  mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));
181  
182                  if (op.RegisterSize == RegisterSize.Simd64)
183                  {
184                      mShifted = context.VectorZeroUpper64(mShifted);
185                  }
186  
187                  Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);
188  
189                  context.Copy(GetVec(op.Rd), res);
190              }
191              else
192              {
193                  Operand res = context.VectorZero();
194  
195                  int bytes = op.GetBytesCount();
196  
197                  int position = op.Imm4 & (bytes - 1);
198  
199                  for (int index = 0; index < bytes; index++)
200                  {
201                      int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
202  
203                      Operand e = EmitVectorExtractZx(context, reg, position, 0);
204  
205                      position = (position + 1) & (bytes - 1);
206  
207                      res = EmitVectorInsert(context, res, e, index, 0);
208                  }
209  
210                  context.Copy(GetVec(op.Rd), res);
211              }
212          }
213  
214          public static void Fcsel_S(ArmEmitterContext context)
215          {
216              OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
217  
218              Operand lblTrue = Label();
219              Operand lblEnd = Label();
220  
221              Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond);
222  
223              context.BranchIfTrue(lblTrue, isTrue);
224  
225              OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
226  
227              Operand me = context.VectorExtract(type, GetVec(op.Rm), 0);
228  
229              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0));
230  
231              context.Branch(lblEnd);
232  
233              context.MarkLabel(lblTrue);
234  
235              Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
236  
237              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
238  
239              context.MarkLabel(lblEnd);
240          }
241  
242          public static void Fmov_Ftoi(ArmEmitterContext context)
243          {
244              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
245  
246              Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
247  
248              SetIntOrZR(context, op.Rd, ne);
249          }
250  
251          public static void Fmov_Ftoi1(ArmEmitterContext context)
252          {
253              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
254  
255              Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3);
256  
257              SetIntOrZR(context, op.Rd, ne);
258          }
259  
260          public static void Fmov_Itof(ArmEmitterContext context)
261          {
262              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
263  
264              Operand n = GetIntOrZR(context, op.Rn);
265  
266              context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2));
267          }
268  
269          public static void Fmov_Itof1(ArmEmitterContext context)
270          {
271              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
272  
273              Operand d = GetVec(op.Rd);
274              Operand n = GetIntOrZR(context, op.Rn);
275  
276              context.Copy(d, EmitVectorInsert(context, d, n, 1, 3));
277          }
278  
279          public static void Fmov_S(ArmEmitterContext context)
280          {
281              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
282  
283              OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
284  
285              Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
286  
287              context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0));
288          }
289  
290          public static void Fmov_Si(ArmEmitterContext context)
291          {
292              OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp;
293  
294              if (Optimizations.UseSse2)
295              {
296                  if (op.Size == 0)
297                  {
298                      context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate));
299                  }
300                  else
301                  {
302                      context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
303                  }
304              }
305              else
306              {
307                  Operand e = Const(op.Immediate);
308  
309                  Operand res = context.VectorZero();
310  
311                  res = EmitVectorInsert(context, res, e, 0, op.Size + 2);
312  
313                  context.Copy(GetVec(op.Rd), res);
314              }
315          }
316  
317          public static void Fmov_Vi(ArmEmitterContext context)
318          {
319              OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
320  
321              if (Optimizations.UseSse2)
322              {
323                  if (op.RegisterSize == RegisterSize.Simd128)
324                  {
325                      context.Copy(GetVec(op.Rd), X86GetAllElements(context, op.Immediate));
326                  }
327                  else
328                  {
329                      context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate));
330                  }
331              }
332              else
333              {
334                  Operand e = Const(op.Immediate);
335  
336                  Operand res = context.VectorZero();
337  
338                  int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;
339  
340                  for (int index = 0; index < elems; index++)
341                  {
342                      res = EmitVectorInsert(context, res, e, index, 3);
343                  }
344  
345                  context.Copy(GetVec(op.Rd), res);
346              }
347          }
348  
349          public static void Ins_Gp(ArmEmitterContext context)
350          {
351              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
352  
353              Operand d = GetVec(op.Rd);
354              Operand n = GetIntOrZR(context, op.Rn);
355  
356              context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size));
357          }
358  
359          public static void Ins_V(ArmEmitterContext context)
360          {
361              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
362  
363              Operand d = GetVec(op.Rd);
364              Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
365  
366              context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size));
367          }
368  
369          public static void Movi_V(ArmEmitterContext context)
370          {
371              if (Optimizations.UseSse2)
372              {
373                  EmitSse2VectorMoviMvniOp(context, not: false);
374              }
375              else
376              {
377                  EmitVectorImmUnaryOp(context, (op1) => op1);
378              }
379          }
380  
381          public static void Mvni_V(ArmEmitterContext context)
382          {
383              if (Optimizations.UseSse2)
384              {
385                  EmitSse2VectorMoviMvniOp(context, not: true);
386              }
387              else
388              {
389                  EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1));
390              }
391          }
392  
393          public static void Smov_S(ArmEmitterContext context)
394          {
395              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
396  
397              Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
398  
399              if (op.RegisterSize == RegisterSize.Simd64)
400              {
401                  ne = context.ZeroExtend32(OperandType.I64, ne);
402              }
403  
404              SetIntOrZR(context, op.Rd, ne);
405          }
406  
407          public static void Tbl_V(ArmEmitterContext context)
408          {
409              EmitTableVectorLookup(context, isTbl: true);
410          }
411  
412          public static void Tbx_V(ArmEmitterContext context)
413          {
414              EmitTableVectorLookup(context, isTbl: false);
415          }
416  
417          public static void Trn1_V(ArmEmitterContext context)
418          {
419              EmitVectorTranspose(context, part: 0);
420          }
421  
422          public static void Trn2_V(ArmEmitterContext context)
423          {
424              EmitVectorTranspose(context, part: 1);
425          }
426  
427          public static void Umov_S(ArmEmitterContext context)
428          {
429              OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;
430  
431              Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
432  
433              SetIntOrZR(context, op.Rd, ne);
434          }
435  
436          public static void Uzp1_V(ArmEmitterContext context)
437          {
438              EmitVectorUnzip(context, part: 0);
439          }
440  
441          public static void Uzp2_V(ArmEmitterContext context)
442          {
443              EmitVectorUnzip(context, part: 1);
444          }
445  
446          public static void Xtn_V(ArmEmitterContext context)
447          {
448              OpCodeSimd op = (OpCodeSimd)context.CurrOp;
449  
450              if (Optimizations.UseSsse3)
451              {
452                  Operand d = GetVec(op.Rd);
453  
454                  Operand res = context.VectorZeroUpper64(d);
455  
456                  Operand mask = X86GetAllElements(context, EvenMasks[op.Size]);
457  
458                  Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, GetVec(op.Rn), mask);
459  
460                  Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
461                      ? Intrinsic.X86Movlhps
462                      : Intrinsic.X86Movhlps;
463  
464                  res = context.AddIntrinsic(movInst, res, res2);
465  
466                  context.Copy(d, res);
467              }
468              else
469              {
470                  int elems = 8 >> op.Size;
471  
472                  int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
473  
474                  Operand d = GetVec(op.Rd);
475  
476                  Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
477  
478                  for (int index = 0; index < elems; index++)
479                  {
480                      Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
481  
482                      res = EmitVectorInsert(context, res, ne, part + index, op.Size);
483                  }
484  
485                  context.Copy(d, res);
486              }
487          }
488  
489          public static void Zip1_V(ArmEmitterContext context)
490          {
491              EmitVectorZip(context, part: 0);
492          }
493  
494          public static void Zip2_V(ArmEmitterContext context)
495          {
496              EmitVectorZip(context, part: 1);
497          }
498  
499          private static void EmitSse2VectorMoviMvniOp(ArmEmitterContext context, bool not)
500          {
501              OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp;
502  
503              long imm = op.Immediate;
504  
505              switch (op.Size)
506              {
507                  case 0:
508                      imm *= 0x01010101;
509                      break;
510                  case 1:
511                      imm *= 0x00010001;
512                      break;
513              }
514  
515              if (not)
516              {
517                  imm = ~imm;
518              }
519  
520              Operand mask;
521  
522              if (op.Size < 3)
523              {
524                  mask = X86GetAllElements(context, (int)imm);
525              }
526              else
527              {
528                  mask = X86GetAllElements(context, imm);
529              }
530  
531              if (op.RegisterSize == RegisterSize.Simd64)
532              {
533                  mask = context.VectorZeroUpper64(mask);
534              }
535  
536              context.Copy(GetVec(op.Rd), mask);
537          }
538  
539          private static void EmitTableVectorLookup(ArmEmitterContext context, bool isTbl)
540          {
541              OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
542  
543              if (Optimizations.UseSsse3)
544              {
545                  Operand d = GetVec(op.Rd);
546                  Operand m = GetVec(op.Rm);
547  
548                  Operand res;
549  
550                  Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL);
551  
552                  // Fast path for single register table.
553                  {
554                      Operand n = GetVec(op.Rn);
555  
556                      Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask);
557                      mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m);
558  
559                      res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask);
560                  }
561  
562                  for (int index = 1; index < op.Size; index++)
563                  {
564                      Operand ni = GetVec((op.Rn + index) & 0x1F);
565  
566                      Operand idxMask = X86GetAllElements(context, 0x1010101010101010L * index);
567  
568                      Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask);
569  
570                      Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask);
571                      mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask);
572  
573                      Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask);
574  
575                      res = context.AddIntrinsic(Intrinsic.X86Por, res, res2);
576                  }
577  
578                  if (!isTbl)
579                  {
580                      Operand idxMask = X86GetAllElements(context, (0x1010101010101010L * op.Size) - 0x0101010101010101L);
581                      Operand zeroMask = context.VectorZero();
582  
583                      Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask);
584                      Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m);
585  
586                      Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask);
587  
588                      Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, d, mMask);
589  
590                      res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask);
591                  }
592  
593                  if (op.RegisterSize == RegisterSize.Simd64)
594                  {
595                      res = context.VectorZeroUpper64(res);
596                  }
597  
598                  context.Copy(d, res);
599              }
600              else
601              {
602                  Operand d = GetVec(op.Rd);
603  
604                  List<Operand> args = new();
605  
606                  if (!isTbl)
607                  {
608                      args.Add(d);
609                  }
610  
611                  args.Add(GetVec(op.Rm));
612  
613                  args.Add(Const(op.RegisterSize == RegisterSize.Simd64 ? 8 : 16));
614  
615                  for (int index = 0; index < op.Size; index++)
616                  {
617                      args.Add(GetVec((op.Rn + index) & 0x1F));
618                  }
619  
620                  MethodInfo info = null;
621  
622                  if (isTbl)
623                  {
624                      switch (op.Size)
625                      {
626                          case 1:
627                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1));
628                              break;
629                          case 2:
630                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2));
631                              break;
632                          case 3:
633                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3));
634                              break;
635                          case 4:
636                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4));
637                              break;
638                      }
639                  }
640                  else
641                  {
642                      switch (op.Size)
643                      {
644                          case 1:
645                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1));
646                              break;
647                          case 2:
648                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2));
649                              break;
650                          case 3:
651                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3));
652                              break;
653                          case 4:
654                              info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4));
655                              break;
656                      }
657                  }
658  
659                  context.Copy(d, context.Call(info, args.ToArray()));
660              }
661          }
662  
663          private static void EmitVectorTranspose(ArmEmitterContext context, int part)
664          {
665              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
666  
667              if (Optimizations.UseSsse3)
668              {
669                  Operand mask = default;
670  
671                  if (op.Size < 3)
672                  {
673                      long maskE0 = EvenMasks[op.Size];
674                      long maskE1 = OddMasks[op.Size];
675  
676                      mask = X86GetScalar(context, maskE0);
677  
678                      mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
679                  }
680  
681                  Operand n = GetVec(op.Rn);
682  
683                  if (op.Size < 3)
684                  {
685                      n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
686                  }
687  
688                  Operand m = GetVec(op.Rm);
689  
690                  if (op.Size < 3)
691                  {
692                      m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
693                  }
694  
695                  Intrinsic punpckInst = part == 0
696                      ? X86PunpcklInstruction[op.Size]
697                      : X86PunpckhInstruction[op.Size];
698  
699                  Operand res = context.AddIntrinsic(punpckInst, n, m);
700  
701                  if (op.RegisterSize == RegisterSize.Simd64)
702                  {
703                      res = context.VectorZeroUpper64(res);
704                  }
705  
706                  context.Copy(GetVec(op.Rd), res);
707              }
708              else
709              {
710                  Operand res = context.VectorZero();
711  
712                  int pairs = op.GetPairsCount() >> op.Size;
713  
714                  for (int index = 0; index < pairs; index++)
715                  {
716                      int pairIndex = index << 1;
717  
718                      Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size);
719                      Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size);
720  
721                      res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
722                      res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
723                  }
724  
725                  context.Copy(GetVec(op.Rd), res);
726              }
727          }
728  
729          private static void EmitVectorUnzip(ArmEmitterContext context, int part)
730          {
731              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
732  
733              if (Optimizations.UseSsse3)
734              {
735                  if (op.RegisterSize == RegisterSize.Simd128)
736                  {
737                      Operand mask = default;
738  
739                      if (op.Size < 3)
740                      {
741                          long maskE0 = EvenMasks[op.Size];
742                          long maskE1 = OddMasks[op.Size];
743  
744                          mask = X86GetScalar(context, maskE0);
745  
746                          mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
747                      }
748  
749                      Operand n = GetVec(op.Rn);
750  
751                      if (op.Size < 3)
752                      {
753                          n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask);
754                      }
755  
756                      Operand m = GetVec(op.Rm);
757  
758                      if (op.Size < 3)
759                      {
760                          m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask);
761                      }
762  
763                      Intrinsic punpckInst = part == 0
764                          ? Intrinsic.X86Punpcklqdq
765                          : Intrinsic.X86Punpckhqdq;
766  
767                      Operand res = context.AddIntrinsic(punpckInst, n, m);
768  
769                      context.Copy(GetVec(op.Rd), res);
770                  }
771                  else
772                  {
773                      Operand n = GetVec(op.Rn);
774                      Operand m = GetVec(op.Rm);
775  
776                      Intrinsic punpcklInst = X86PunpcklInstruction[op.Size];
777  
778                      Operand res = context.AddIntrinsic(punpcklInst, n, m);
779  
780                      if (op.Size < 2)
781                      {
782                          long maskE0 = _masksE0_Uzp[op.Size];
783                          long maskE1 = _masksE1_Uzp[op.Size];
784  
785                          Operand mask = X86GetScalar(context, maskE0);
786  
787                          mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3);
788  
789                          res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask);
790                      }
791  
792                      Intrinsic punpckInst = part == 0
793                          ? Intrinsic.X86Punpcklqdq
794                          : Intrinsic.X86Punpckhqdq;
795  
796                      res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
797  
798                      context.Copy(GetVec(op.Rd), res);
799                  }
800              }
801              else
802              {
803                  Operand res = context.VectorZero();
804  
805                  int pairs = op.GetPairsCount() >> op.Size;
806  
807                  for (int index = 0; index < pairs; index++)
808                  {
809                      int idx = index << 1;
810  
811                      Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
812                      Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
813  
814                      res = EmitVectorInsert(context, res, ne, index, op.Size);
815                      res = EmitVectorInsert(context, res, me, pairs + index, op.Size);
816                  }
817  
818                  context.Copy(GetVec(op.Rd), res);
819              }
820          }
821  
822          private static void EmitVectorZip(ArmEmitterContext context, int part)
823          {
824              OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
825  
826              if (Optimizations.UseSse2)
827              {
828                  Operand n = GetVec(op.Rn);
829                  Operand m = GetVec(op.Rm);
830  
831                  if (op.RegisterSize == RegisterSize.Simd128)
832                  {
833                      Intrinsic punpckInst = part == 0
834                          ? X86PunpcklInstruction[op.Size]
835                          : X86PunpckhInstruction[op.Size];
836  
837                      Operand res = context.AddIntrinsic(punpckInst, n, m);
838  
839                      context.Copy(GetVec(op.Rd), res);
840                  }
841                  else
842                  {
843                      Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);
844  
845                      Intrinsic punpckInst = part == 0
846                          ? Intrinsic.X86Punpcklqdq
847                          : Intrinsic.X86Punpckhqdq;
848  
849                      res = context.AddIntrinsic(punpckInst, res, context.VectorZero());
850  
851                      context.Copy(GetVec(op.Rd), res);
852                  }
853              }
854              else
855              {
856                  Operand res = context.VectorZero();
857  
858                  int pairs = op.GetPairsCount() >> op.Size;
859  
860                  int baseIndex = part != 0 ? pairs : 0;
861  
862                  for (int index = 0; index < pairs; index++)
863                  {
864                      int pairIndex = index << 1;
865  
866                      Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
867                      Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);
868  
869                      res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
870                      res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
871                  }
872  
873                  context.Copy(GetVec(op.Rd), res);
874              }
875          }
876      }
877  }