Translator.cs
  1  using Ryujinx.Graphics.Shader.Decoders;
  2  using Ryujinx.Graphics.Shader.IntermediateRepresentation;
  3  using System;
  4  using System.Linq;
  5  using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
  6  
  7  namespace Ryujinx.Graphics.Shader.Translation
  8  {
  9      public static class Translator
 10      {
 11          private const int ThreadsPerWarp = 32;
 12          private const int HeaderSize = 0x50;
 13  
 14          internal readonly struct FunctionCode
 15          {
 16              public Operation[] Code { get; }
 17  
 18              public FunctionCode(Operation[] code)
 19              {
 20                  Code = code;
 21              }
 22          }
 23  
 24          public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
 25          {
 26              return DecodeShader(address, gpuAccessor, options);
 27          }
 28  
 29          private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
 30          {
 31              int localMemorySize;
 32              ShaderDefinitions definitions;
 33              DecodedProgram program;
 34  
 35              if (options.Flags.HasFlag(TranslationFlags.Compute))
 36              {
 37                  definitions = CreateComputeDefinitions(gpuAccessor);
 38                  localMemorySize = gpuAccessor.QueryComputeLocalMemorySize();
 39  
 40                  program = Decoder.Decode(definitions, gpuAccessor, address);
 41              }
 42              else
 43              {
 44                  ShaderHeader header = new(gpuAccessor, address);
 45  
 46                  definitions = CreateGraphicsDefinitions(gpuAccessor, header);
 47                  localMemorySize = GetLocalMemorySize(header);
 48  
 49                  program = Decoder.Decode(definitions, gpuAccessor, address + HeaderSize);
 50              }
 51  
 52              ulong maxEndAddress = 0;
 53  
 54              foreach (DecodedFunction function in program)
 55              {
 56                  foreach (Block block in function.Blocks)
 57                  {
 58                      if (maxEndAddress < block.EndAddress)
 59                      {
 60                          maxEndAddress = block.EndAddress;
 61                      }
 62                  }
 63              }
 64  
 65              int size = (int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize);
 66  
 67              return new TranslatorContext(address, size, localMemorySize, definitions, gpuAccessor, options, program);
 68          }
 69  
 70          private static ShaderDefinitions CreateComputeDefinitions(IGpuAccessor gpuAccessor)
 71          {
 72              return new ShaderDefinitions(
 73                  ShaderStage.Compute,
 74                  gpuAccessor.QueryComputeLocalSizeX(),
 75                  gpuAccessor.QueryComputeLocalSizeY(),
 76                  gpuAccessor.QueryComputeLocalSizeZ());
 77          }
 78  
 79          private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header)
 80          {
 81              TransformFeedbackOutput[] transformFeedbackOutputs = GetTransformFeedbackOutputs(gpuAccessor, out ulong transformFeedbackVecMap);
 82  
 83              return new ShaderDefinitions(
 84                  header.Stage,
 85                  gpuAccessor.QueryGraphicsState(),
 86                  header.Stage == ShaderStage.Geometry && header.GpPassthrough,
 87                  header.ThreadsPerInputPrimitive,
 88                  header.OutputTopology,
 89                  header.MaxOutputVertexCount,
 90                  header.ImapTypes,
 91                  header.OmapTargets,
 92                  header.OmapSampleMask,
 93                  header.OmapDepth,
 94                  gpuAccessor.QueryHostSupportsScaledVertexFormats(),
 95                  transformFeedbackVecMap,
 96                  transformFeedbackOutputs);
 97          }
 98  
 99          internal static TransformFeedbackOutput[] GetTransformFeedbackOutputs(IGpuAccessor gpuAccessor, out ulong transformFeedbackVecMap)
100          {
101              bool transformFeedbackEnabled =
102                  gpuAccessor.QueryTransformFeedbackEnabled() &&
103                  gpuAccessor.QueryHostSupportsTransformFeedback();
104              TransformFeedbackOutput[] transformFeedbackOutputs = null;
105              transformFeedbackVecMap = 0UL;
106  
107              if (transformFeedbackEnabled)
108              {
109                  transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
110  
111                  for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
112                  {
113                      var locations = gpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
114                      var stride = gpuAccessor.QueryTransformFeedbackStride(tfbIndex);
115  
116                      for (int i = 0; i < locations.Length; i++)
117                      {
118                          byte wordOffset = locations[i];
119                          if (wordOffset < 0xc0)
120                          {
121                              transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
122                              transformFeedbackVecMap |= 1UL << (wordOffset / 4);
123                          }
124                      }
125                  }
126              }
127  
128              return transformFeedbackOutputs;
129          }
130  
131          private static int GetLocalMemorySize(ShaderHeader header)
132          {
133              return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
134          }
135  
136          internal static FunctionCode[] EmitShader(
137              TranslatorContext translatorContext,
138              ResourceManager resourceManager,
139              DecodedProgram program,
140              bool vertexAsCompute,
141              bool initializeOutputs,
142              out int initializationOperations)
143          {
144              initializationOperations = 0;
145  
146              FunctionMatch.RunPass(program);
147  
148              foreach (DecodedFunction function in program.Where(x => !x.IsCompilerGenerated).OrderBy(x => x.Address))
149              {
150                  program.AddFunctionAndSetId(function);
151              }
152  
153              FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount];
154  
155              for (int index = 0; index < functions.Length; index++)
156              {
157                  EmitterContext context = new(translatorContext, resourceManager, program, vertexAsCompute, index != 0);
158  
159                  if (initializeOutputs && index == 0)
160                  {
161                      EmitOutputsInitialization(context, translatorContext.AttributeUsage, translatorContext.GpuAccessor, translatorContext.Stage);
162                      initializationOperations = context.OperationsCount;
163                  }
164  
165                  DecodedFunction function = program.GetFunctionById(index);
166  
167                  foreach (Block block in function.Blocks)
168                  {
169                      context.CurrBlock = block;
170  
171                      context.EnterBlock(block.Address);
172  
173                      EmitOps(context, block);
174                  }
175  
176                  functions[index] = new(context.GetOperations());
177              }
178  
179              return functions;
180          }
181  
182          private static void EmitOutputsInitialization(EmitterContext context, AttributeUsage attributeUsage, IGpuAccessor gpuAccessor, ShaderStage stage)
183          {
184              // Compute has no output attributes, and fragment is the last stage, so we
185              // don't need to initialize outputs on those stages.
186              if (stage == ShaderStage.Compute || stage == ShaderStage.Fragment)
187              {
188                  return;
189              }
190  
191              if (stage == ShaderStage.Vertex)
192              {
193                  InitializeVertexOutputs(context);
194              }
195  
196              UInt128 usedAttributes = context.TranslatorContext.AttributeUsage.NextInputAttributesComponents;
197              while (usedAttributes != UInt128.Zero)
198              {
199                  int index = (int)UInt128.TrailingZeroCount(usedAttributes);
200                  int vecIndex = index / 4;
201  
202                  usedAttributes &= ~(UInt128.One << index);
203  
204                  // We don't need to initialize passthrough attributes.
205                  if ((context.TranslatorContext.AttributeUsage.PassthroughAttributes & (1 << vecIndex)) != 0)
206                  {
207                      continue;
208                  }
209  
210                  InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
211              }
212  
213              if (context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch != null)
214              {
215                  foreach (int vecIndex in context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch.Order())
216                  {
217                      InitializeOutput(context, vecIndex, perPatch: true);
218                  }
219              }
220  
221              if (attributeUsage.NextUsesFixedFuncAttributes)
222              {
223                  bool supportsLayerFromVertexOrTess = gpuAccessor.QueryHostSupportsLayerVertexTessellation();
224                  int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
225  
226                  for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
227                  {
228                      int index = attributeUsage.GetFreeUserAttribute(isOutput: true, i);
229                      if (index < 0)
230                      {
231                          break;
232                      }
233  
234                      InitializeOutput(context, index, perPatch: false);
235                  }
236              }
237          }
238  
239          private static void InitializeVertexOutputs(EmitterContext context)
240          {
241              for (int c = 0; c < 4; c++)
242              {
243                  context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f));
244              }
245  
246              if (context.Program.ClipDistancesWritten != 0)
247              {
248                  for (int i = 0; i < 8; i++)
249                  {
250                      context.Store(StorageKind.Output, IoVariable.ClipDistance, null, Const(i), ConstF(0f));
251                  }
252              }
253          }
254  
255          private static void InitializeOutput(EmitterContext context, int location, bool perPatch)
256          {
257              for (int c = 0; c < 4; c++)
258              {
259                  InitializeOutputComponent(context, location, c, perPatch);
260              }
261          }
262  
263          private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch)
264          {
265              StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
266  
267              if (context.TranslatorContext.Definitions.OaIndexing)
268              {
269                  Operand invocationId = null;
270  
271                  if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
272                  {
273                      invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
274                  }
275  
276                  int index = location * 4 + c;
277  
278                  context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f));
279              }
280              else
281              {
282                  if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
283                  {
284                      Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
285                      context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
286                  }
287                  else
288                  {
289                      context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f));
290                  }
291              }
292          }
293  
294          private static void EmitOps(EmitterContext context, Block block)
295          {
296              for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
297              {
298                  InstOp op = block.OpCodes[opIndex];
299  
300                  if (context.TranslatorContext.Options.Flags.HasFlag(TranslationFlags.DebugMode))
301                  {
302                      string instName;
303  
304                      if (op.Emitter != null)
305                      {
306                          instName = op.Name.ToString();
307                      }
308                      else
309                      {
310                          instName = "???";
311  
312                          context.TranslatorContext.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
313                      }
314  
315                      string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
316  
317                      context.Add(new CommentNode(dbgComment));
318                  }
319  
320                  InstConditional opConditional = new(op.RawOpCode);
321  
322                  bool noPred = op.Props.HasFlag(InstProps.NoPred);
323                  if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv)
324                  {
325                      continue;
326                  }
327  
328                  Operand predSkipLbl = null;
329  
330                  if (Decoder.IsPopBranch(op.Name))
331                  {
332                      // If the instruction is a SYNC or BRK instruction with only one
333                      // possible target address, then the instruction is basically
334                      // just a simple branch, we can generate code similar to branch
335                      // instructions, with the condition check on the branch itself.
336                      noPred = block.SyncTargets.Count <= 1;
337                  }
338                  else if (op.Name == InstName.Bra)
339                  {
340                      noPred = true;
341                  }
342  
343                  if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred))
344                  {
345                      Operand label;
346  
347                      if (opIndex == block.OpCodes.Count - 1 && block.HasNext())
348                      {
349                          label = context.GetLabel(block.Successors[0].Address);
350                      }
351                      else
352                      {
353                          label = Label();
354  
355                          predSkipLbl = label;
356                      }
357  
358                      Operand pred = Register(opConditional.Pred, RegisterType.Predicate);
359  
360                      if (opConditional.PredInv)
361                      {
362                          context.BranchIfTrue(label, pred);
363                      }
364                      else
365                      {
366                          context.BranchIfFalse(label, pred);
367                      }
368                  }
369  
370                  context.CurrOp = op;
371  
372                  op.Emitter?.Invoke(context);
373  
374                  if (predSkipLbl != null)
375                  {
376                      context.MarkLabel(predSkipLbl);
377                  }
378              }
379          }
380      }
381  }