Translator.cs
1 using Ryujinx.Graphics.Shader.Decoders; 2 using Ryujinx.Graphics.Shader.IntermediateRepresentation; 3 using System; 4 using System.Linq; 5 using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; 6 7 namespace Ryujinx.Graphics.Shader.Translation 8 { 9 public static class Translator 10 { 11 private const int ThreadsPerWarp = 32; 12 private const int HeaderSize = 0x50; 13 14 internal readonly struct FunctionCode 15 { 16 public Operation[] Code { get; } 17 18 public FunctionCode(Operation[] code) 19 { 20 Code = code; 21 } 22 } 23 24 public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) 25 { 26 return DecodeShader(address, gpuAccessor, options); 27 } 28 29 private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) 30 { 31 int localMemorySize; 32 ShaderDefinitions definitions; 33 DecodedProgram program; 34 35 if (options.Flags.HasFlag(TranslationFlags.Compute)) 36 { 37 definitions = CreateComputeDefinitions(gpuAccessor); 38 localMemorySize = gpuAccessor.QueryComputeLocalMemorySize(); 39 40 program = Decoder.Decode(definitions, gpuAccessor, address); 41 } 42 else 43 { 44 ShaderHeader header = new(gpuAccessor, address); 45 46 definitions = CreateGraphicsDefinitions(gpuAccessor, header); 47 localMemorySize = GetLocalMemorySize(header); 48 49 program = Decoder.Decode(definitions, gpuAccessor, address + HeaderSize); 50 } 51 52 ulong maxEndAddress = 0; 53 54 foreach (DecodedFunction function in program) 55 { 56 foreach (Block block in function.Blocks) 57 { 58 if (maxEndAddress < block.EndAddress) 59 { 60 maxEndAddress = block.EndAddress; 61 } 62 } 63 } 64 65 int size = (int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize); 66 67 return new TranslatorContext(address, size, localMemorySize, definitions, gpuAccessor, options, program); 68 } 69 70 private static ShaderDefinitions CreateComputeDefinitions(IGpuAccessor gpuAccessor) 71 { 72 return new ShaderDefinitions( 73 ShaderStage.Compute, 74 gpuAccessor.QueryComputeLocalSizeX(), 75 gpuAccessor.QueryComputeLocalSizeY(), 76 gpuAccessor.QueryComputeLocalSizeZ()); 77 } 78 79 private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header) 80 { 81 TransformFeedbackOutput[] transformFeedbackOutputs = GetTransformFeedbackOutputs(gpuAccessor, out ulong transformFeedbackVecMap); 82 83 return new ShaderDefinitions( 84 header.Stage, 85 gpuAccessor.QueryGraphicsState(), 86 header.Stage == ShaderStage.Geometry && header.GpPassthrough, 87 header.ThreadsPerInputPrimitive, 88 header.OutputTopology, 89 header.MaxOutputVertexCount, 90 header.ImapTypes, 91 header.OmapTargets, 92 header.OmapSampleMask, 93 header.OmapDepth, 94 gpuAccessor.QueryHostSupportsScaledVertexFormats(), 95 transformFeedbackVecMap, 96 transformFeedbackOutputs); 97 } 98 99 internal static TransformFeedbackOutput[] GetTransformFeedbackOutputs(IGpuAccessor gpuAccessor, out ulong transformFeedbackVecMap) 100 { 101 bool transformFeedbackEnabled = 102 gpuAccessor.QueryTransformFeedbackEnabled() && 103 gpuAccessor.QueryHostSupportsTransformFeedback(); 104 TransformFeedbackOutput[] transformFeedbackOutputs = null; 105 transformFeedbackVecMap = 0UL; 106 107 if (transformFeedbackEnabled) 108 { 109 transformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; 110 111 for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++) 112 { 113 var locations = gpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); 114 var stride = gpuAccessor.QueryTransformFeedbackStride(tfbIndex); 115 116 for (int i = 0; i < locations.Length; i++) 117 { 118 byte wordOffset = locations[i]; 119 if (wordOffset < 0xc0) 120 { 121 transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride); 122 transformFeedbackVecMap |= 1UL << (wordOffset / 4); 123 } 124 } 125 } 126 } 127 128 return transformFeedbackOutputs; 129 } 130 131 private static int GetLocalMemorySize(ShaderHeader header) 132 { 133 return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); 134 } 135 136 internal static FunctionCode[] EmitShader( 137 TranslatorContext translatorContext, 138 ResourceManager resourceManager, 139 DecodedProgram program, 140 bool vertexAsCompute, 141 bool initializeOutputs, 142 out int initializationOperations) 143 { 144 initializationOperations = 0; 145 146 FunctionMatch.RunPass(program); 147 148 foreach (DecodedFunction function in program.Where(x => !x.IsCompilerGenerated).OrderBy(x => x.Address)) 149 { 150 program.AddFunctionAndSetId(function); 151 } 152 153 FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount]; 154 155 for (int index = 0; index < functions.Length; index++) 156 { 157 EmitterContext context = new(translatorContext, resourceManager, program, vertexAsCompute, index != 0); 158 159 if (initializeOutputs && index == 0) 160 { 161 EmitOutputsInitialization(context, translatorContext.AttributeUsage, translatorContext.GpuAccessor, translatorContext.Stage); 162 initializationOperations = context.OperationsCount; 163 } 164 165 DecodedFunction function = program.GetFunctionById(index); 166 167 foreach (Block block in function.Blocks) 168 { 169 context.CurrBlock = block; 170 171 context.EnterBlock(block.Address); 172 173 EmitOps(context, block); 174 } 175 176 functions[index] = new(context.GetOperations()); 177 } 178 179 return functions; 180 } 181 182 private static void EmitOutputsInitialization(EmitterContext context, AttributeUsage attributeUsage, IGpuAccessor gpuAccessor, ShaderStage stage) 183 { 184 // Compute has no output attributes, and fragment is the last stage, so we 185 // don't need to initialize outputs on those stages. 186 if (stage == ShaderStage.Compute || stage == ShaderStage.Fragment) 187 { 188 return; 189 } 190 191 if (stage == ShaderStage.Vertex) 192 { 193 InitializeVertexOutputs(context); 194 } 195 196 UInt128 usedAttributes = context.TranslatorContext.AttributeUsage.NextInputAttributesComponents; 197 while (usedAttributes != UInt128.Zero) 198 { 199 int index = (int)UInt128.TrailingZeroCount(usedAttributes); 200 int vecIndex = index / 4; 201 202 usedAttributes &= ~(UInt128.One << index); 203 204 // We don't need to initialize passthrough attributes. 205 if ((context.TranslatorContext.AttributeUsage.PassthroughAttributes & (1 << vecIndex)) != 0) 206 { 207 continue; 208 } 209 210 InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false); 211 } 212 213 if (context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch != null) 214 { 215 foreach (int vecIndex in context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch.Order()) 216 { 217 InitializeOutput(context, vecIndex, perPatch: true); 218 } 219 } 220 221 if (attributeUsage.NextUsesFixedFuncAttributes) 222 { 223 bool supportsLayerFromVertexOrTess = gpuAccessor.QueryHostSupportsLayerVertexTessellation(); 224 int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; 225 226 for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++) 227 { 228 int index = attributeUsage.GetFreeUserAttribute(isOutput: true, i); 229 if (index < 0) 230 { 231 break; 232 } 233 234 InitializeOutput(context, index, perPatch: false); 235 } 236 } 237 } 238 239 private static void InitializeVertexOutputs(EmitterContext context) 240 { 241 for (int c = 0; c < 4; c++) 242 { 243 context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f)); 244 } 245 246 if (context.Program.ClipDistancesWritten != 0) 247 { 248 for (int i = 0; i < 8; i++) 249 { 250 context.Store(StorageKind.Output, IoVariable.ClipDistance, null, Const(i), ConstF(0f)); 251 } 252 } 253 } 254 255 private static void InitializeOutput(EmitterContext context, int location, bool perPatch) 256 { 257 for (int c = 0; c < 4; c++) 258 { 259 InitializeOutputComponent(context, location, c, perPatch); 260 } 261 } 262 263 private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch) 264 { 265 StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output; 266 267 if (context.TranslatorContext.Definitions.OaIndexing) 268 { 269 Operand invocationId = null; 270 271 if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch) 272 { 273 invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); 274 } 275 276 int index = location * 4 + c; 277 278 context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f)); 279 } 280 else 281 { 282 if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch) 283 { 284 Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); 285 context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f)); 286 } 287 else 288 { 289 context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f)); 290 } 291 } 292 } 293 294 private static void EmitOps(EmitterContext context, Block block) 295 { 296 for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++) 297 { 298 InstOp op = block.OpCodes[opIndex]; 299 300 if (context.TranslatorContext.Options.Flags.HasFlag(TranslationFlags.DebugMode)) 301 { 302 string instName; 303 304 if (op.Emitter != null) 305 { 306 instName = op.Name.ToString(); 307 } 308 else 309 { 310 instName = "???"; 311 312 context.TranslatorContext.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16})."); 313 } 314 315 string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}"; 316 317 context.Add(new CommentNode(dbgComment)); 318 } 319 320 InstConditional opConditional = new(op.RawOpCode); 321 322 bool noPred = op.Props.HasFlag(InstProps.NoPred); 323 if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv) 324 { 325 continue; 326 } 327 328 Operand predSkipLbl = null; 329 330 if (Decoder.IsPopBranch(op.Name)) 331 { 332 // If the instruction is a SYNC or BRK instruction with only one 333 // possible target address, then the instruction is basically 334 // just a simple branch, we can generate code similar to branch 335 // instructions, with the condition check on the branch itself. 336 noPred = block.SyncTargets.Count <= 1; 337 } 338 else if (op.Name == InstName.Bra) 339 { 340 noPred = true; 341 } 342 343 if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred)) 344 { 345 Operand label; 346 347 if (opIndex == block.OpCodes.Count - 1 && block.HasNext()) 348 { 349 label = context.GetLabel(block.Successors[0].Address); 350 } 351 else 352 { 353 label = Label(); 354 355 predSkipLbl = label; 356 } 357 358 Operand pred = Register(opConditional.Pred, RegisterType.Predicate); 359 360 if (opConditional.PredInv) 361 { 362 context.BranchIfTrue(label, pred); 363 } 364 else 365 { 366 context.BranchIfFalse(label, pred); 367 } 368 } 369 370 context.CurrOp = op; 371 372 op.Emitter?.Invoke(context); 373 374 if (predSkipLbl != null) 375 { 376 context.MarkLabel(predSkipLbl); 377 } 378 } 379 } 380 } 381 }