/ src / Ryujinx.Graphics.Shader / Instructions / InstEmitMemory.cs
InstEmitMemory.cs
  1  using Ryujinx.Graphics.Shader.Decoders;
  2  using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3  using Ryujinx.Graphics.Shader.Translation;
  4  using System.Numerics;
  5  using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
  6  using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  7  
  8  namespace Ryujinx.Graphics.Shader.Instructions
  9  {
 10      static partial class InstEmit
 11      {
 12          public static void Atom(EmitterContext context)
 13          {
 14              InstAtom op = context.GetOp<InstAtom>();
 15  
 16              int sOffset = (op.Imm20 << 12) >> 12;
 17  
 18              (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
 19  
 20              Operand value = GetSrcReg(context, op.SrcB);
 21  
 22              Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
 23  
 24              context.Copy(GetDest(op.Dest), res);
 25          }
 26  
 27          public static void Atoms(EmitterContext context)
 28          {
 29              if (context.TranslatorContext.Definitions.Stage != ShaderStage.Compute)
 30              {
 31                  context.TranslatorContext.GpuAccessor.Log($"Atoms instruction is not valid on \"{context.TranslatorContext.Definitions.Stage}\" stage.");
 32                  return;
 33              }
 34  
 35              InstAtoms op = context.GetOp<InstAtoms>();
 36  
 37              Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
 38  
 39              int sOffset = (op.Imm22 << 10) >> 10;
 40  
 41              offset = context.IAdd(offset, Const(sOffset));
 42  
 43              Operand value = GetSrcReg(context, op.SrcB);
 44  
 45              AtomSize size = op.AtomsSize switch
 46              {
 47                  AtomsSize.S32 => AtomSize.S32,
 48                  AtomsSize.U64 => AtomSize.U64,
 49                  AtomsSize.S64 => AtomSize.S64,
 50                  _ => AtomSize.U32,
 51              };
 52  
 53              Operand id = Const(context.ResourceManager.SharedMemoryId);
 54              Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, id, offset, value);
 55  
 56              context.Copy(GetDest(op.Dest), res);
 57          }
 58  
 59          public static void Ldc(EmitterContext context)
 60          {
 61              InstLdc op = context.GetOp<InstLdc>();
 62  
 63              if (op.LsSize > LsSize2.B64)
 64              {
 65                  context.TranslatorContext.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
 66                  return;
 67              }
 68  
 69              bool isSmallInt = op.LsSize < LsSize2.B32;
 70  
 71              int count = op.LsSize == LsSize2.B64 ? 2 : 1;
 72  
 73              Operand slot = Const(op.CbufSlot);
 74              Operand srcA = GetSrcReg(context, op.SrcA);
 75  
 76              if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
 77              {
 78                  slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
 79                  srcA = context.BitwiseAnd(srcA, Const(0xffff));
 80              }
 81  
 82              Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
 83              Operand wordOffset = context.ShiftRightU32(addr, Const(2));
 84  
 85              for (int index = 0; index < count; index++)
 86              {
 87                  Register dest = new(op.Dest + index, RegisterType.Gpr);
 88  
 89                  if (dest.IsRZ)
 90                  {
 91                      break;
 92                  }
 93  
 94                  Operand offset = context.IAdd(wordOffset, Const(index));
 95                  Operand value = EmitLoadConstant(context, slot, offset);
 96  
 97                  if (isSmallInt)
 98                  {
 99                      value = ExtractSmallInt(context, (LsSize)op.LsSize, GetBitOffset(context, addr), value);
100                  }
101  
102                  context.Copy(Register(dest), value);
103              }
104          }
105  
106          public static void Ldg(EmitterContext context)
107          {
108              InstLdg op = context.GetOp<InstLdg>();
109  
110              EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
111          }
112  
113          public static void Ldl(EmitterContext context)
114          {
115              InstLdl op = context.GetOp<InstLdl>();
116  
117              EmitLoad(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
118          }
119  
120          public static void Lds(EmitterContext context)
121          {
122              if (context.TranslatorContext.Definitions.Stage != ShaderStage.Compute)
123              {
124                  context.TranslatorContext.GpuAccessor.Log($"Lds instruction is not valid on \"{context.TranslatorContext.Definitions.Stage}\" stage.");
125                  return;
126              }
127  
128              InstLds op = context.GetOp<InstLds>();
129  
130              EmitLoad(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
131          }
132  
133          public static void Red(EmitterContext context)
134          {
135              InstRed op = context.GetOp<InstRed>();
136  
137              (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
138  
139              EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
140          }
141  
142          public static void Stg(EmitterContext context)
143          {
144              InstStg op = context.GetOp<InstStg>();
145  
146              EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
147          }
148  
149          public static void Stl(EmitterContext context)
150          {
151              InstStl op = context.GetOp<InstStl>();
152  
153              EmitStore(context, StorageKind.LocalMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
154          }
155  
156          public static void Sts(EmitterContext context)
157          {
158              if (context.TranslatorContext.Definitions.Stage != ShaderStage.Compute)
159              {
160                  context.TranslatorContext.GpuAccessor.Log($"Sts instruction is not valid on \"{context.TranslatorContext.Definitions.Stage}\" stage.");
161                  return;
162              }
163  
164              InstSts op = context.GetOp<InstSts>();
165  
166              EmitStore(context, StorageKind.SharedMemory, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
167          }
168  
169          private static Operand EmitLoadConstant(EmitterContext context, Operand slot, Operand offset)
170          {
171              Operand vecIndex = context.ShiftRightU32(offset, Const(2));
172              Operand elemIndex = context.BitwiseAnd(offset, Const(3));
173  
174              if (slot.Type == OperandType.Constant)
175              {
176                  int binding = context.ResourceManager.GetConstantBufferBinding(slot.Value);
177                  return context.Load(StorageKind.ConstantBuffer, binding, Const(0), vecIndex, elemIndex);
178              }
179              else
180              {
181                  Operand value = Const(0);
182  
183                  uint cbUseMask = context.TranslatorContext.GpuAccessor.QueryConstantBufferUse();
184  
185                  while (cbUseMask != 0)
186                  {
187                      int cbIndex = BitOperations.TrailingZeroCount(cbUseMask);
188                      int binding = context.ResourceManager.GetConstantBufferBinding(cbIndex);
189  
190                      Operand isCurrent = context.ICompareEqual(slot, Const(cbIndex));
191                      Operand currentValue = context.Load(StorageKind.ConstantBuffer, binding, Const(0), vecIndex, elemIndex);
192  
193                      value = context.ConditionalSelect(isCurrent, currentValue, value);
194  
195                      cbUseMask &= ~(1u << cbIndex);
196                  }
197  
198                  return value;
199              }
200          }
201  
202          private static Operand EmitAtomicOp(
203              EmitterContext context,
204              StorageKind storageKind,
205              AtomOp op,
206              AtomSize type,
207              Operand e0,
208              Operand e1,
209              Operand value)
210          {
211              Operand res = Const(0);
212  
213              switch (op)
214              {
215                  case AtomOp.Add:
216                      if (type == AtomSize.S32 || type == AtomSize.U32)
217                      {
218                          res = context.AtomicAdd(storageKind, e0, e1, value);
219                      }
220                      else
221                      {
222                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
223                      }
224                      break;
225                  case AtomOp.Min:
226                      if (type == AtomSize.S32)
227                      {
228                          res = context.AtomicMinS32(storageKind, e0, e1, value);
229                      }
230                      else if (type == AtomSize.U32)
231                      {
232                          res = context.AtomicMinU32(storageKind, e0, e1, value);
233                      }
234                      else
235                      {
236                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
237                      }
238                      break;
239                  case AtomOp.Max:
240                      if (type == AtomSize.S32)
241                      {
242                          res = context.AtomicMaxS32(storageKind, e0, e1, value);
243                      }
244                      else if (type == AtomSize.U32)
245                      {
246                          res = context.AtomicMaxU32(storageKind, e0, e1, value);
247                      }
248                      else
249                      {
250                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
251                      }
252                      break;
253                  case AtomOp.And:
254                      if (type == AtomSize.S32 || type == AtomSize.U32)
255                      {
256                          res = context.AtomicAnd(storageKind, e0, e1, value);
257                      }
258                      else
259                      {
260                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
261                      }
262                      break;
263                  case AtomOp.Or:
264                      if (type == AtomSize.S32 || type == AtomSize.U32)
265                      {
266                          res = context.AtomicOr(storageKind, e0, e1, value);
267                      }
268                      else
269                      {
270                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
271                      }
272                      break;
273                  case AtomOp.Xor:
274                      if (type == AtomSize.S32 || type == AtomSize.U32)
275                      {
276                          res = context.AtomicXor(storageKind, e0, e1, value);
277                      }
278                      else
279                      {
280                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
281                      }
282                      break;
283                  case AtomOp.Exch:
284                      if (type == AtomSize.S32 || type == AtomSize.U32)
285                      {
286                          res = context.AtomicSwap(storageKind, e0, e1, value);
287                      }
288                      else
289                      {
290                          context.TranslatorContext.GpuAccessor.Log($"Invalid reduction type: {type}.");
291                      }
292                      break;
293                  default:
294                      context.TranslatorContext.GpuAccessor.Log($"Invalid atomic operation: {op}.");
295                      break;
296              }
297  
298              return res;
299          }
300  
301          private static void EmitLoad(
302              EmitterContext context,
303              StorageKind storageKind,
304              LsSize2 size,
305              Operand srcA,
306              int rd,
307              int offset)
308          {
309              if (size > LsSize2.B128)
310              {
311                  context.TranslatorContext.GpuAccessor.Log($"Invalid load size: {size}.");
312                  return;
313              }
314  
315              int id = storageKind == StorageKind.LocalMemory
316                  ? context.ResourceManager.LocalMemoryId
317                  : context.ResourceManager.SharedMemoryId;
318              bool isSmallInt = size < LsSize2.B32;
319  
320              int count = size switch
321              {
322                  LsSize2.B64 => 2,
323                  LsSize2.B128 => 4,
324                  _ => 1,
325              };
326  
327              Operand baseOffset = context.Copy(srcA);
328  
329              for (int index = 0; index < count; index++)
330              {
331                  Register dest = new(rd + index, RegisterType.Gpr);
332  
333                  if (dest.IsRZ)
334                  {
335                      break;
336                  }
337  
338                  Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
339                  Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
340                  Operand bitOffset = GetBitOffset(context, byteOffset);
341                  Operand value = context.Load(storageKind, id, wordOffset);
342  
343                  if (isSmallInt)
344                  {
345                      value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
346                  }
347  
348                  context.Copy(Register(dest), value);
349              }
350          }
351  
352          private static void EmitLdg(
353              EmitterContext context,
354              LsSize size,
355              int ra,
356              int rd,
357              int offset,
358              bool extended)
359          {
360              int count = GetVectorCount(size);
361              StorageKind storageKind = GetStorageKind(size);
362  
363              (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
364  
365              Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
366  
367              for (int index = 0; index < count; index++)
368              {
369                  Register dest = new(rd + index, RegisterType.Gpr);
370  
371                  if (dest.IsRZ)
372                  {
373                      break;
374                  }
375  
376                  Operand value = context.Load(storageKind, context.IAdd(srcA, Const(offset + index * 4)), addrHigh);
377  
378                  context.Copy(Register(dest), value);
379              }
380          }
381  
382          private static void EmitStore(
383              EmitterContext context,
384              StorageKind storageKind,
385              LsSize2 size,
386              Operand srcA,
387              int rd,
388              int offset)
389          {
390              if (size > LsSize2.B128)
391              {
392                  context.TranslatorContext.GpuAccessor.Log($"Invalid store size: {size}.");
393                  return;
394              }
395  
396              int id = storageKind == StorageKind.LocalMemory
397                  ? context.ResourceManager.LocalMemoryId
398                  : context.ResourceManager.SharedMemoryId;
399              bool isSmallInt = size < LsSize2.B32;
400  
401              int count = size switch
402              {
403                  LsSize2.B64 => 2,
404                  LsSize2.B128 => 4,
405                  _ => 1,
406              };
407  
408              Operand baseOffset = context.Copy(srcA);
409  
410              for (int index = 0; index < count; index++)
411              {
412                  bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
413  
414                  Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
415                  Operand byteOffset = context.IAdd(baseOffset, Const(offset + index * 4));
416                  Operand wordOffset = context.ShiftRightU32(byteOffset, Const(2));
417                  Operand bitOffset = GetBitOffset(context, byteOffset);
418  
419                  if (isSmallInt && storageKind == StorageKind.LocalMemory)
420                  {
421                      Operand word = context.Load(storageKind, id, wordOffset);
422  
423                      value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
424                  }
425  
426                  if (storageKind == StorageKind.LocalMemory)
427                  {
428                      context.Store(storageKind, id, wordOffset, value);
429                  }
430                  else if (storageKind == StorageKind.SharedMemory)
431                  {
432                      switch (size)
433                      {
434                          case LsSize2.U8:
435                          case LsSize2.S8:
436                              context.Store(StorageKind.SharedMemory8, id, byteOffset, value);
437                              break;
438                          case LsSize2.U16:
439                          case LsSize2.S16:
440                              context.Store(StorageKind.SharedMemory16, id, byteOffset, value);
441                              break;
442                          default:
443                              context.Store(storageKind, id, wordOffset, value);
444                              break;
445                      }
446                  }
447              }
448          }
449  
450          private static void EmitStg(
451              EmitterContext context,
452              LsSize2 size,
453              int ra,
454              int rd,
455              int offset,
456              bool extended)
457          {
458              if (size > LsSize2.B128)
459              {
460                  context.TranslatorContext.GpuAccessor.Log($"Invalid store size: {size}.");
461                  return;
462              }
463  
464              int count = GetVectorCount((LsSize)size);
465              StorageKind storageKind = GetStorageKind((LsSize)size);
466  
467              (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
468  
469              Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr)));
470  
471              for (int index = 0; index < count; index++)
472              {
473                  bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
474  
475                  Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
476  
477                  Operand addrLowOffset = context.IAdd(srcA, Const(offset + index * 4));
478  
479                  context.Store(storageKind, addrLowOffset, addrHigh, value);
480              }
481          }
482  
483          private static StorageKind GetStorageKind(LsSize size)
484          {
485              return size switch
486              {
487                  LsSize.U8 => StorageKind.GlobalMemoryU8,
488                  LsSize.S8 => StorageKind.GlobalMemoryS8,
489                  LsSize.U16 => StorageKind.GlobalMemoryU16,
490                  LsSize.S16 => StorageKind.GlobalMemoryS16,
491                  _ => StorageKind.GlobalMemory,
492              };
493          }
494  
495          private static int GetVectorCount(LsSize size)
496          {
497              return size switch
498              {
499                  LsSize.B64 => 2,
500                  LsSize.B128 or LsSize.UB128 => 4,
501                  _ => 1,
502              };
503          }
504  
505          private static (Operand, Operand) Get40BitsAddress(
506              EmitterContext context,
507              Register ra,
508              bool extended,
509              int offset)
510          {
511              Operand addrLow = Register(ra);
512              Operand addrHigh;
513  
514              if (extended && !ra.IsRZ)
515              {
516                  addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
517              }
518              else
519              {
520                  addrHigh = Const(0);
521              }
522  
523              Operand offs = Const(offset);
524  
525              addrLow = context.IAdd(addrLow, offs);
526  
527              if (extended)
528              {
529                  Operand carry = context.ICompareLessUnsigned(addrLow, offs);
530  
531                  addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
532              }
533  
534              return (addrLow, addrHigh);
535          }
536  
537          private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
538          {
539              // Note: bit offset = (baseOffset & 0b11) * 8.
540              // Addresses should be always aligned to the integer type,
541              // so we don't need to take unaligned addresses into account.
542              return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
543          }
544  
545          private static Operand ExtractSmallInt(
546              EmitterContext context,
547              LsSize size,
548              Operand bitOffset,
549              Operand value)
550          {
551              value = context.ShiftRightU32(value, bitOffset);
552  
553              switch (size)
554              {
555                  case LsSize.U8:
556                      value = ZeroExtendTo32(context, value, 8);
557                      break;
558                  case LsSize.U16:
559                      value = ZeroExtendTo32(context, value, 16);
560                      break;
561                  case LsSize.S8:
562                      value = SignExtendTo32(context, value, 8);
563                      break;
564                  case LsSize.S16:
565                      value = SignExtendTo32(context, value, 16);
566                      break;
567              }
568  
569              return value;
570          }
571  
572          private static Operand InsertSmallInt(
573              EmitterContext context,
574              LsSize size,
575              Operand bitOffset,
576              Operand word,
577              Operand value)
578          {
579              switch (size)
580              {
581                  case LsSize.U8:
582                  case LsSize.S8:
583                      value = context.BitwiseAnd(value, Const(0xff));
584                      value = context.BitfieldInsert(word, value, bitOffset, Const(8));
585                      break;
586  
587                  case LsSize.U16:
588                  case LsSize.S16:
589                      value = context.BitwiseAnd(value, Const(0xffff));
590                      value = context.BitfieldInsert(word, value, bitOffset, Const(16));
591                      break;
592              }
593  
594              return value;
595          }
596      }
597  }