From cb171f6ebfa7e1aa5721503d1c1115719957932d Mon Sep 17 00:00:00 2001 From: gdkchan Date: Fri, 6 Dec 2019 19:37:00 -0300 Subject: [PATCH] Support shared color mask, implement more shader instructions Support shared color masks (used by Nouveau and maybe the NVIDIA driver). Support draw buffers (also required by OpenGL). Support viewport transform disable (disabled for now as it breaks some games). Fix instanced rendering draw being ignored for multi draw. Fix IADD and IADD3 immediate shader encodings, that was not matching some ops. Implement FFMA32I shader instruction. Implement IMAD shader instruction. --- Ryujinx.Graphics.GAL/Capabilities.cs | 3 + Ryujinx.Graphics.Gpu/Engine/MethodClear.cs | 4 +- Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs | 24 +++--- .../Engine/MethodUniformBufferUpdate.cs | 6 -- Ryujinx.Graphics.Gpu/Engine/Methods.cs | 82 ++++++++++++++----- Ryujinx.Graphics.Gpu/NvGpuFifo.cs | 2 + Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 33 +++++--- Ryujinx.Graphics.Gpu/State/GpuState.cs | 8 +- Ryujinx.Graphics.Gpu/State/MethodOffset.cs | 3 + Ryujinx.Graphics.Gpu/State/RtControl.cs | 17 ++++ Ryujinx.Graphics.OpenGL/HwCapabilities.cs | 2 + Ryujinx.Graphics.OpenGL/Renderer.cs | 1 + .../CodeGen/Glsl/Declarations.cs | 13 +++ .../CodeGen/Glsl/DefaultNames.cs | 3 + .../HelperFunctions/HelperFunctionNames.cs | 3 + .../Glsl/HelperFunctions/MultiplyHighS32.glsl | 7 ++ .../Glsl/HelperFunctions/MultiplyHighU32.glsl | 7 ++ .../Glsl/Instructions/InstGenHelper.cs | 2 + .../Glsl/Instructions/InstGenMemory.cs | 1 - .../Decoders/OpCodeTable.cs | 9 +- .../Instructions/InstEmitAlu.cs | 44 ++++++++++ .../Instructions/InstEmitFArith.cs | 20 +++++ .../IntermediateRepresentation/Instruction.cs | 2 + .../Ryujinx.Graphics.Shader.csproj | 2 + Ryujinx.Graphics.Shader/ShaderCapabilities.cs | 8 +- .../StructuredIr/HelperFunctionsMask.cs | 12 +-- .../StructuredIr/InstructionInfo.cs | 2 + .../StructuredIr/StructuredProgram.cs | 6 ++ .../Translation/EmitterContext.cs | 32 ++++++-- .../Translation/EmitterContextInsts.cs | 10 +++ .../Translation/TranslationFlags.cs | 3 +- .../Translation/Translator.cs | 34 ++++---- 32 files changed, 313 insertions(+), 92 deletions(-) create mode 100644 Ryujinx.Graphics.Gpu/State/RtControl.cs create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl create mode 100644 Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 246722f812..4a8b7c83b0 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -4,13 +4,16 @@ namespace Ryujinx.Graphics.GAL { public bool SupportsAstcCompression { get; } + public int MaximumViewportDimensions { get; } public int StorageBufferOffsetAlignment { get; } public Capabilities( bool supportsAstcCompression, + int maximumViewportDimensions, int storageBufferOffsetAlignment) { SupportsAstcCompression = supportsAstcCompression; + MaximumViewportDimensions = maximumViewportDimensions; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; } } diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs b/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs index da6c94e57d..43f8b25db9 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodClear.cs @@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Gpu.Engine { private void Clear(GpuState state, int argument) { - UpdateRenderTargetStateIfNeeded(state); + UpdateRenderTargetState(state, useControl: false); _textureManager.CommitGraphicsBindings(); @@ -49,6 +49,8 @@ namespace Ryujinx.Graphics.Gpu.Engine stencilValue, stencilMask); } + + UpdateRenderTargetState(state, useControl: true); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs b/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs index d69b9ea03e..b7e8a64b2f 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodDraw.cs @@ -10,7 +10,7 @@ namespace Ryujinx.Graphics.Gpu.Engine private int _firstIndex; private int _indexCount; - private bool _instancedHasState; + private bool _instancedDrawPending; private bool _instancedIndexed; private int _instancedFirstIndex; @@ -32,9 +32,9 @@ namespace Ryujinx.Graphics.Gpu.Engine if (instanced) { - if (!_instancedHasState) + if (!_instancedDrawPending) { - _instancedHasState = true; + _instancedDrawPending = true; _instancedIndexed = _drawIndexed; @@ -82,20 +82,22 @@ namespace Ryujinx.Graphics.Gpu.Engine private void DrawBegin(GpuState state, int argument) { - PrimitiveType type = (PrimitiveType)(argument & 0xffff); - - _context.Renderer.Pipeline.SetPrimitiveTopology(type.Convert()); - - PrimitiveType = type; - if ((argument & (1 << 26)) != 0) { _instanceIndex++; } else if ((argument & (1 << 27)) == 0) { + PerformDeferredDraws(); + _instanceIndex = 0; } + + PrimitiveType type = (PrimitiveType)(argument & 0xffff); + + _context.Renderer.Pipeline.SetPrimitiveTopology(type.Convert()); + + PrimitiveType = type; } private void SetIndexBufferCount(GpuState state, int argument) @@ -106,9 +108,9 @@ namespace Ryujinx.Graphics.Gpu.Engine public void PerformDeferredDraws() { // Perform any pending instanced draw. - if (_instancedHasState) + if (_instancedDrawPending) { - _instancedHasState = false; + _instancedDrawPending = false; if (_instancedIndexed) { diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs index 12d44f511b..43bab2433b 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodUniformBufferUpdate.cs @@ -1,4 +1,3 @@ -using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.State; namespace Ryujinx.Graphics.Gpu.Engine @@ -9,11 +8,6 @@ namespace Ryujinx.Graphics.Gpu.Engine { var uniformBuffer = state.Get(MethodOffset.UniformBufferState); - if (_context.MemoryManager.Translate(uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset) == MemoryManager.BadAddress) - { - return; - } - _context.MemoryAccessor.Write(uniformBuffer.Address.Pack() + (uint)uniformBuffer.Offset, argument); state.SetUniformBufferOffset(uniformBuffer.Offset + 4); diff --git a/Ryujinx.Graphics.Gpu/Engine/Methods.cs b/Ryujinx.Graphics.Gpu/Engine/Methods.cs index 18fd7e7086..5388c86d53 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Methods.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Methods.cs @@ -86,7 +86,14 @@ namespace Ryujinx.Graphics.Gpu.Engine UpdateShaderState(state); } - UpdateRenderTargetStateIfNeeded(state); + if (state.QueryModified(MethodOffset.RtColorState, + MethodOffset.RtDepthStencilState, + MethodOffset.RtControl, + MethodOffset.RtDepthStencilSize, + MethodOffset.RtDepthStencilEnable)) + { + UpdateRenderTargetState(state, useControl: true); + } if (state.QueryModified(MethodOffset.DepthTestEnable, MethodOffset.DepthWriteEnable, @@ -155,7 +162,7 @@ namespace Ryujinx.Graphics.Gpu.Engine UpdateFaceState(state); } - if (state.QueryModified(MethodOffset.RtColorMask)) + if (state.QueryModified(MethodOffset.RtColorMaskShared, MethodOffset.RtColorMask)) { UpdateRtColorMask(state); } @@ -210,19 +217,12 @@ namespace Ryujinx.Graphics.Gpu.Engine } } - private void UpdateRenderTargetStateIfNeeded(GpuState state) + private void UpdateRenderTargetState(GpuState state, bool useControl) { - if (state.QueryModified(MethodOffset.RtColorState, - MethodOffset.RtDepthStencilState, - MethodOffset.RtDepthStencilSize, - MethodOffset.RtDepthStencilEnable)) - { - UpdateRenderTargetState(state); - } - } + var rtControl = state.Get(MethodOffset.RtControl); + + int count = useControl ? rtControl.UnpackCount() : Constants.TotalRenderTargets; - private void UpdateRenderTargetState(GpuState state) - { var msaaMode = state.Get(MethodOffset.RtMsaaMode); int samplesInX = msaaMode.SamplesInX(); @@ -230,9 +230,11 @@ namespace Ryujinx.Graphics.Gpu.Engine for (int index = 0; index < Constants.TotalRenderTargets; index++) { - var colorState = state.Get(MethodOffset.RtColorState, index); + int rtIndex = useControl ? rtControl.UnpackPermutationIndex(index) : index; - if (!IsRtEnabled(colorState)) + var colorState = state.Get(MethodOffset.RtColorState, rtIndex); + + if (index >= count || !IsRtEnabled(colorState)) { _textureManager.SetRenderTargetColor(index, null); @@ -292,6 +294,8 @@ namespace Ryujinx.Graphics.Gpu.Engine private void UpdateViewportTransform(GpuState state) { + bool transformEnable = GetViewportTransformEnable(state); + bool flipY = (state.Get(MethodOffset.YControl) & 1) != 0; float yFlip = flipY ? -1 : 1; @@ -303,13 +307,35 @@ namespace Ryujinx.Graphics.Gpu.Engine var transform = state.Get(MethodOffset.ViewportTransform, index); var extents = state.Get (MethodOffset.ViewportExtents, index); - float x = transform.TranslateX - MathF.Abs(transform.ScaleX); - float y = transform.TranslateY - MathF.Abs(transform.ScaleY); + RectangleF region; - float width = transform.ScaleX * 2; - float height = transform.ScaleY * 2 * yFlip; + if (transformEnable) + { + float x = transform.TranslateX - MathF.Abs(transform.ScaleX); + float y = transform.TranslateY - MathF.Abs(transform.ScaleY); - RectangleF region = new RectangleF(x, y, width, height); + float width = transform.ScaleX * 2; + float height = transform.ScaleY * 2 * yFlip; + + region = new RectangleF(x, y, width, height); + } + else + { + // It's not possible to fully disable viewport transform, at least with the most + // common graphics APIs, but we can effectively disable it with a dummy transform. + // The transform is defined as: xw = (width / 2) * xndc + x + (width / 2) + // By setting x to -(width / 2), we effectively remove the translation. + // By setting the width to 2, we remove the scale since 2 / 2 = 1. + // Now, the only problem is the viewport clipping, that we also can't disable. + // To prevent the values from being clipped, we multiply (-1, -1, 2, 2) by + // the maximum supported viewport dimensions. + // This must be compensated on the shader, by dividing the vertex position + // by the maximum viewport dimensions. + float maxSize = (float)_context.Capabilities.MaximumViewportDimensions; + float halfMaxSize = (float)_context.Capabilities.MaximumViewportDimensions * 0.5f; + + region = new RectangleF(-halfMaxSize, -halfMaxSize, maxSize, maxSize * yFlip); + } viewports[index] = new Viewport( region, @@ -537,11 +563,13 @@ namespace Ryujinx.Graphics.Gpu.Engine private void UpdateRtColorMask(GpuState state) { + bool rtColorMaskShared = state.Get(MethodOffset.RtColorMaskShared); + uint[] componentMasks = new uint[Constants.TotalRenderTargets]; for (int index = 0; index < Constants.TotalRenderTargets; index++) { - var colorMask = state.Get(MethodOffset.RtColorMask, index); + var colorMask = state.Get(MethodOffset.RtColorMask, rtColorMaskShared ? 0 : index); uint componentMask = 0; @@ -634,7 +662,9 @@ namespace Ryujinx.Graphics.Gpu.Engine addressesArray[index] = baseAddress + shader.Offset; } - GraphicsShader gs = _shaderCache.GetGraphicsShader(addresses); + bool viewportTransformEnable = GetViewportTransformEnable(state); + + GraphicsShader gs = _shaderCache.GetGraphicsShader(addresses, !viewportTransformEnable); _vsUsesInstanceId = gs.Shader[0].Program.Info.UsesInstanceId; @@ -695,6 +725,14 @@ namespace Ryujinx.Graphics.Gpu.Engine _context.Renderer.Pipeline.BindProgram(gs.HostProgram); } + private bool GetViewportTransformEnable(GpuState state) + { + // FIXME: We should read ViewportTransformEnable, but it seems that some games writes 0 there? + // return state.Get(MethodOffset.ViewportTransformEnable) != 0; + + return true; + } + private static Target GetTarget(SamplerType type) { type &= ~(SamplerType.Indexed | SamplerType.Shadow); diff --git a/Ryujinx.Graphics.Gpu/NvGpuFifo.cs b/Ryujinx.Graphics.Gpu/NvGpuFifo.cs index 64947bf63f..6e02f39156 100644 --- a/Ryujinx.Graphics.Gpu/NvGpuFifo.cs +++ b/Ryujinx.Graphics.Gpu/NvGpuFifo.cs @@ -105,6 +105,8 @@ namespace Ryujinx.Graphics.Gpu { case NvGpuFifoMeth.WaitForIdle: { + _context.Methods.PerformDeferredDraws(); + _context.Renderer.FlushPipelines(); break; diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 8e39662d1e..6e3a42a238 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -69,7 +69,7 @@ namespace Ryujinx.Graphics.Gpu.Shader return cpShader; } - public GraphicsShader GetGraphicsShader(ShaderAddresses addresses) + public GraphicsShader GetGraphicsShader(ShaderAddresses addresses, bool dividePosXY) { bool isCached = _gpPrograms.TryGetValue(addresses, out List list); @@ -86,19 +86,28 @@ namespace Ryujinx.Graphics.Gpu.Shader GraphicsShader gpShaders = new GraphicsShader(); + TranslationFlags flags = + TranslationFlags.DebugMode | + TranslationFlags.Unspecialized; + + if (dividePosXY) + { + flags |= TranslationFlags.DividePosXY; + } + if (addresses.VertexA != 0) { - gpShaders.Shader[0] = TranslateGraphicsShader(addresses.Vertex, addresses.VertexA); + gpShaders.Shader[0] = TranslateGraphicsShader(flags, addresses.Vertex, addresses.VertexA); } else { - gpShaders.Shader[0] = TranslateGraphicsShader(addresses.Vertex); + gpShaders.Shader[0] = TranslateGraphicsShader(flags, addresses.Vertex); } - gpShaders.Shader[1] = TranslateGraphicsShader(addresses.TessControl); - gpShaders.Shader[2] = TranslateGraphicsShader(addresses.TessEvaluation); - gpShaders.Shader[3] = TranslateGraphicsShader(addresses.Geometry); - gpShaders.Shader[4] = TranslateGraphicsShader(addresses.Fragment); + gpShaders.Shader[1] = TranslateGraphicsShader(flags, addresses.TessControl); + gpShaders.Shader[2] = TranslateGraphicsShader(flags, addresses.TessEvaluation); + gpShaders.Shader[3] = TranslateGraphicsShader(flags, addresses.Geometry); + gpShaders.Shader[4] = TranslateGraphicsShader(flags, addresses.Fragment); BackpropQualifiers(gpShaders); @@ -218,7 +227,7 @@ namespace Ryujinx.Graphics.Gpu.Shader return new CachedShader(program, codeCached); } - private CachedShader TranslateGraphicsShader(ulong gpuVa, ulong gpuVaA = 0) + private CachedShader TranslateGraphicsShader(TranslationFlags flags, ulong gpuVa, ulong gpuVaA = 0) { if (gpuVa == 0) { @@ -227,10 +236,6 @@ namespace Ryujinx.Graphics.Gpu.Shader ShaderProgram program; - const TranslationFlags flags = - TranslationFlags.DebugMode | - TranslationFlags.Unspecialized; - int[] codeCached = null; if (gpuVaA != 0) @@ -345,7 +350,9 @@ namespace Ryujinx.Graphics.Gpu.Shader private ShaderCapabilities GetShaderCapabilities() { - return new ShaderCapabilities(_context.Capabilities.StorageBufferOffsetAlignment); + return new ShaderCapabilities( + _context.Capabilities.MaximumViewportDimensions, + _context.Capabilities.StorageBufferOffsetAlignment); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/State/GpuState.cs b/Ryujinx.Graphics.Gpu/State/GpuState.cs index 13f777c914..509f67152e 100644 --- a/Ryujinx.Graphics.Gpu/State/GpuState.cs +++ b/Ryujinx.Graphics.Gpu/State/GpuState.cs @@ -94,11 +94,17 @@ namespace Ryujinx.Graphics.Gpu.State _backingMemory[(int)MethodOffset.ViewportExtents + index * 4 + 3] = 0x3F800000; } + // Viewport transform enable. + _backingMemory[(int)MethodOffset.ViewportTransformEnable] = 1; + // Default front stencil mask. _backingMemory[0x4e7] = 0xff; // Default color mask. - _backingMemory[(int)MethodOffset.RtColorMask] = 0x1111; + for (int index = 0; index < Constants.TotalRenderTargets; index++) + { + _backingMemory[(int)MethodOffset.RtColorMask + index] = 0x1111; + } } public void RegisterCallback(MethodOffset offset, int count, MethodCallback callback) diff --git a/Ryujinx.Graphics.Gpu/State/MethodOffset.cs b/Ryujinx.Graphics.Gpu/State/MethodOffset.cs index a560c257c3..93cd6f0636 100644 --- a/Ryujinx.Graphics.Gpu/State/MethodOffset.cs +++ b/Ryujinx.Graphics.Gpu/State/MethodOffset.cs @@ -29,8 +29,10 @@ namespace Ryujinx.Graphics.Gpu.State StencilBackMasks = 0x3d5, InvalidateTextures = 0x3dd, TextureBarrierTiled = 0x3df, + RtColorMaskShared = 0x3e4, RtDepthStencilState = 0x3f8, VertexAttribState = 0x458, + RtControl = 0x487, RtDepthStencilSize = 0x48a, SamplerIndex = 0x48d, DepthTestEnable = 0x4b3, @@ -62,6 +64,7 @@ namespace Ryujinx.Graphics.Gpu.State DepthBiasClamp = 0x61f, VertexBufferInstanced = 0x620, FaceState = 0x646, + ViewportTransformEnable = 0x64b, Clear = 0x674, RtColorMask = 0x680, ReportState = 0x6c0, diff --git a/Ryujinx.Graphics.Gpu/State/RtControl.cs b/Ryujinx.Graphics.Gpu/State/RtControl.cs new file mode 100644 index 0000000000..4c6fbc3431 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/State/RtControl.cs @@ -0,0 +1,17 @@ +namespace Ryujinx.Graphics.Gpu.State +{ + struct RtControl + { + public uint Packed; + + public int UnpackCount() + { + return (int)(Packed & 0xf); + } + + public int UnpackPermutationIndex(int index) + { + return (int)((Packed >> (4 + index * 3)) & 7); + } + } +} diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 70112a3a92..671bd5b20e 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -7,10 +7,12 @@ namespace Ryujinx.Graphics.OpenGL { private static Lazy _supportsAstcCompression = new Lazy(() => HasExtension("GL_KHR_texture_compression_astc_ldr")); + private static Lazy _maximumViewportDimensions = new Lazy(() => GetLimit(All.MaxViewportDims)); private static Lazy _storageBufferOffsetAlignment = new Lazy(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); public static bool SupportsAstcCompression => _supportsAstcCompression.Value; + public static int MaximumViewportDimensions => _maximumViewportDimensions.Value; public static int StorageBufferOffsetAlignment => _storageBufferOffsetAlignment.Value; private static bool HasExtension(string name) diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index c320d1504b..3007fe5ccc 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -63,6 +63,7 @@ namespace Ryujinx.Graphics.OpenGL { return new Capabilities( HwCapabilities.SupportsAstcCompression, + HwCapabilities.MaximumViewportDimensions, HwCapabilities.StorageBufferOffsetAlignment); } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index a5c8cc9a9d..85a0001b01 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -142,6 +142,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine(); } + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighU32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl"); + } + if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl"); @@ -170,6 +180,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info) { + context.AppendLine(GetVarTypeName(VariableType.S32) + " " + DefaultNames.DummyIntName + ";"); + context.AppendLine(GetVarTypeName(VariableType.U32) + " " + DefaultNames.DummyUintName + ";"); + foreach (AstOperand decl in info.Locals) { string name = context.OperandManager.DeclareLocal(decl); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs index 4da38b2de5..90853b9f6c 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -22,6 +22,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public const string LocalMemoryName = "local_mem"; public const string SharedMemoryName = "shared_mem"; + public const string DummyIntName = "dummyInt"; + public const string DummyUintName = "dummyUint"; + public const string UndefinedName = "undef"; } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs index f1540fbfb1..21c435475f 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { static class HelperFunctionNames { + public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; + public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; + public static string Shuffle = "Helper_Shuffle"; public static string ShuffleDown = "Helper_ShuffleDown"; public static string ShuffleUp = "Helper_ShuffleUp"; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl new file mode 100644 index 0000000000..caad6f5696 --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl @@ -0,0 +1,7 @@ +int Helper_MultiplyHighS32(int x, int y) +{ + int msb; + int lsb; + imulExtended(x, y, msb, lsb); + return msb; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl new file mode 100644 index 0000000000..617a925f6b --- /dev/null +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl @@ -0,0 +1,7 @@ +uint Helper_MultiplyHighU32(uint x, uint y) +{ + uint msb; + uint lsb; + umulExtended(x, y, msb, lsb); + return msb; +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index f013ee10ce..2b4ae7f195 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -93,6 +93,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.Minimum, InstType.CallBinary, "min"); Add(Instruction.MinimumU32, InstType.CallBinary, "min"); Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1); + Add(Instruction.MultiplyHighS32, InstType.CallBinary, HelperFunctionNames.MultiplyHighS32); + Add(Instruction.MultiplyHighU32, InstType.CallBinary, HelperFunctionNames.MultiplyHighU32); Add(Instruction.Negate, InstType.OpUnary, "-", 0); Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "inversesqrt"); Add(Instruction.Return, InstType.OpNullary, "return"); diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index 5c2ea85e6e..ffed4c71ca 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -1,6 +1,5 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; -using Ryujinx.Graphics.Shader.Translation.Optimizations; using System; using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs index 599c674f34..0837e18589 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs @@ -57,6 +57,7 @@ namespace Ryujinx.Graphics.Shader.Decoders Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg)); Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf)); Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm)); + Set("000011xxxxxxxx", InstEmit.Ffma32i, typeof(OpCodeFArithImm32)); Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf)); Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg)); Set("0100110000110x", InstEmit.Flo, typeof(OpCodeAluCbuf)); @@ -102,12 +103,16 @@ namespace Ryujinx.Graphics.Shader.Decoders Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm)); Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg)); Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf)); - Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm)); + Set("0011100x00010x", InstEmit.Iadd, typeof(OpCodeAluImm)); Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32)); Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg)); Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf)); - Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); + Set("0011100x1100xx", InstEmit.Iadd3, typeof(OpCodeAluImm)); Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg)); + Set("010010100xxxxx", InstEmit.Imad, typeof(OpCodeAluCbuf)); + Set("0011010x0xxxxx", InstEmit.Imad, typeof(OpCodeAluImm)); + Set("010110100xxxxx", InstEmit.Imad, typeof(OpCodeAluReg)); + Set("010100100xxxxx", InstEmit.Imad, typeof(OpCodeAluRegCbuf)); Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf)); Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm)); Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg)); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs index 1d3a1101cb..2a8f00ccb5 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs @@ -200,6 +200,50 @@ namespace Ryujinx.Graphics.Shader.Instructions // TODO: CC, X, corner cases } + public static void Imad(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + bool signedA = context.CurrOp.RawOpCode.Extract(48); + bool signedB = context.CurrOp.RawOpCode.Extract(53); + bool high = context.CurrOp.RawOpCode.Extract(54); + + Operand srcA = GetSrcA(context); + Operand srcB = GetSrcB(context); + Operand srcC = GetSrcC(context); + + Operand res; + + if (high) + { + if (signedA && signedB) + { + res = context.MultiplyHighS32(srcA, srcB); + } + else + { + res = context.MultiplyHighU32(srcA, srcB); + + if (signedA) + { + res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))); + } + else if (signedB) + { + res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)))); + } + } + } + else + { + res = context.IMultiply(srcA, srcB); + } + + res = context.IAdd(res, srcC); + + context.Copy(GetDest(context), res); + } + public static void Imnmx(EmitterContext context) { OpCodeAlu op = (OpCodeAlu)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs index 3b8d7305b2..63d1efcbc1 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs @@ -59,6 +59,26 @@ namespace Ryujinx.Graphics.Shader.Instructions SetFPZnFlags(context, dest, op.SetCondCode); } + public static void Ffma32i(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool saturate = op.RawOpCode.Extract(55); + bool negateA = op.RawOpCode.Extract(56); + bool negateC = op.RawOpCode.Extract(57); + + Operand srcA = context.FPNegate(GetSrcA(context), negateA); + Operand srcC = context.FPNegate(GetDest(context), negateC); + + Operand srcB = GetSrcB(context); + + Operand dest = GetDest(context); + + context.Copy(dest, context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC), saturate)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + public static void Fmnmx(EmitterContext context) { IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs index 6acaa490c9..5f0407c28d 100644 --- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs +++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -84,6 +84,8 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation Minimum, MinimumU32, Multiply, + MultiplyHighS32, + MultiplyHighU32, Negate, PackDouble2x32, PackHalf2x16, diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index e10d1edaf7..8715dad319 100644 --- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -1,6 +1,8 @@ + + diff --git a/Ryujinx.Graphics.Shader/ShaderCapabilities.cs b/Ryujinx.Graphics.Shader/ShaderCapabilities.cs index 939c7c1de6..b5b459a8d8 100644 --- a/Ryujinx.Graphics.Shader/ShaderCapabilities.cs +++ b/Ryujinx.Graphics.Shader/ShaderCapabilities.cs @@ -2,14 +2,18 @@ namespace Ryujinx.Graphics.Shader { public struct ShaderCapabilities { - private static readonly ShaderCapabilities _default = new ShaderCapabilities(16); + private static readonly ShaderCapabilities _default = new ShaderCapabilities(32768, 16); public static ShaderCapabilities Default => _default; + public int MaximumViewportDimensions { get; } public int StorageBufferOffsetAlignment { get; } - public ShaderCapabilities(int storageBufferOffsetAlignment) + public ShaderCapabilities( + int maximumViewportDimensions, + int storageBufferOffsetAlignment) { + MaximumViewportDimensions = maximumViewportDimensions; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; } } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index e2eee78d92..53367fce14 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -5,10 +5,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr [Flags] enum HelperFunctionsMask { - Shuffle = 1 << 0, - ShuffleDown = 1 << 1, - ShuffleUp = 1 << 2, - ShuffleXor = 1 << 3, - SwizzleAdd = 1 << 4 + MultiplyHighS32 = 1 << 0, + MultiplyHighU32 = 1 << 1, + Shuffle = 1 << 2, + ShuffleDown = 1 << 3, + ShuffleUp = 1 << 4, + ShuffleXor = 1 << 5, + SwizzleAdd = 1 << 6 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index d1874f50ff..9614b65989 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -102,6 +102,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.Minimum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); Add(Instruction.MinimumU32, VariableType.U32, VariableType.U32, VariableType.U32); Add(Instruction.Multiply, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar); + Add(Instruction.MultiplyHighS32, VariableType.S32, VariableType.S32, VariableType.S32); + Add(Instruction.MultiplyHighU32, VariableType.U32, VariableType.U32, VariableType.U32); Add(Instruction.Negate, VariableType.Scalar, VariableType.Scalar); Add(Instruction.PackHalf2x16, VariableType.U32, VariableType.F32, VariableType.F32); Add(Instruction.ReciprocalSquareRoot, VariableType.Scalar, VariableType.Scalar); diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index a85fbae3d2..504dc38676 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -171,6 +171,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr // decide which helper functions are needed on the final generated code. switch (operation.Inst) { + case Instruction.MultiplyHighS32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; + break; + case Instruction.MultiplyHighU32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighU32; + break; case Instruction.Shuffle: context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle; break; diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index 3995d43069..7ba7b697bc 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -11,18 +11,25 @@ namespace Ryujinx.Graphics.Shader.Translation public Block CurrBlock { get; set; } public OpCode CurrOp { get; set; } - private ShaderStage _stage; - - private ShaderHeader _header; + private ShaderStage _stage; + private ShaderHeader _header; + private ShaderCapabilities _capabilities; + private TranslationFlags _flags; private List _operations; private Dictionary _labels; - public EmitterContext(ShaderStage stage, ShaderHeader header) + public EmitterContext( + ShaderStage stage, + ShaderHeader header, + ShaderCapabilities capabilities, + TranslationFlags flags) { - _stage = stage; - _header = header; + _stage = stage; + _header = header; + _capabilities = capabilities; + _flags = flags; _operations = new List(); @@ -62,7 +69,18 @@ namespace Ryujinx.Graphics.Shader.Translation public void PrepareForReturn() { - if (_stage == ShaderStage.Fragment) + if (_stage == ShaderStage.Vertex) + { + if ((_flags & TranslationFlags.DividePosXY) != 0) + { + Operand posX = Attribute(AttributeConsts.PositionX); + Operand posY = Attribute(AttributeConsts.PositionY); + + this.Copy(posX, this.FPDivide(posX, ConstF(_capabilities.MaximumViewportDimensions / 2))); + this.Copy(posY, this.FPDivide(posY, ConstF(_capabilities.MaximumViewportDimensions / 2))); + } + } + else if (_stage == ShaderStage.Fragment) { if (_header.OmapDepth) { diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index d884cfdb34..e39d8c645a 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -476,6 +476,16 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.LoadShared, Local(), a); } + public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighS32, Local(), a, b); + } + + public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighU32, Local(), a, b); + } + public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b) { return context.Add(Instruction.PackHalf2x16, Local(), a, b); diff --git a/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs b/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs index 99b6107a9d..8faa43836b 100644 --- a/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs +++ b/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.Shader.Translation Compute = 1 << 0, DebugMode = 1 << 1, - Unspecialized = 1 << 2 + Unspecialized = 1 << 2, + DividePosXY = 1 << 3 } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index b129be9397..69e63ae135 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -49,15 +49,9 @@ namespace Ryujinx.Graphics.Shader.Translation public static ShaderProgram Translate(Span code, ShaderCapabilities capabilities, TranslationFlags flags) { - bool compute = (flags & TranslationFlags.Compute) != 0; - bool debugMode = (flags & TranslationFlags.DebugMode) != 0; + bool compute = (flags & TranslationFlags.Compute) != 0; - Operation[] ops = DecodeShader( - code, - compute, - debugMode, - out ShaderHeader header, - out int size); + Operation[] ops = DecodeShader(code, capabilities, flags, out ShaderHeader header, out int size); ShaderStage stage; @@ -94,8 +88,8 @@ namespace Ryujinx.Graphics.Shader.Translation { bool debugMode = (flags & TranslationFlags.DebugMode) != 0; - Operation[] vpAOps = DecodeShader(vpACode, compute: false, debugMode, out _, out _); - Operation[] vpBOps = DecodeShader(vpBCode, compute: false, debugMode, out ShaderHeader header, out int sizeB); + Operation[] vpAOps = DecodeShader(vpACode, capabilities, flags, out _, out _); + Operation[] vpBOps = DecodeShader(vpBCode, capabilities, flags, out ShaderHeader header, out int sizeB); ShaderConfig config = new ShaderConfig( header.Stage, @@ -142,23 +136,23 @@ namespace Ryujinx.Graphics.Shader.Translation } private static Operation[] DecodeShader( - Span code, - bool compute, - bool debugMode, - out ShaderHeader header, - out int size) + Span code, + ShaderCapabilities capabilities, + TranslationFlags flags, + out ShaderHeader header, + out int size) { Block[] cfg; EmitterContext context; - if (compute) + if ((flags & TranslationFlags.Compute) != 0) { header = null; cfg = Decoder.Decode(code, 0); - context = new EmitterContext(ShaderStage.Compute, header); + context = new EmitterContext(ShaderStage.Compute, header, capabilities, flags); } else { @@ -166,7 +160,7 @@ namespace Ryujinx.Graphics.Shader.Translation cfg = Decoder.Decode(code, HeaderSize); - context = new EmitterContext(header.Stage, header); + context = new EmitterContext(header.Stage, header, capabilities, flags); } if (cfg == null) @@ -197,7 +191,7 @@ namespace Ryujinx.Graphics.Shader.Translation { OpCode op = block.OpCodes[opIndex]; - if (debugMode) + if ((flags & TranslationFlags.DebugMode) != 0) { string instName; @@ -274,7 +268,7 @@ namespace Ryujinx.Graphics.Shader.Translation } } - size = (int)maxEndAddress + (compute ? 0 : HeaderSize); + size = (int)maxEndAddress + (((flags & TranslationFlags.Compute) != 0) ? 0 : HeaderSize); return context.GetOperations(); }