From 76b53e018a2e867899dbce2f3ce5173bbc4eed22 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Fri, 20 Oct 2023 14:05:09 +0100 Subject: [PATCH] GPU: Add fallback when textureGatherOffsets is not supported (#5792) * GPU: Add fallback when textureGatherOffsets is not supported. This PR adds a fallback for GPUs or APIs that don't support an equivalent to the method `textureGatherOffsets`, where each of the 4 gathered texels has an individual offset. This is done by reusing the existing code to handle non-const offsets for texture instructions, though it has also been corrected as there were a few implementation issues. MoltenVK reports support for this capability, and it didn't error when we initially released the MacOS build, but that has since changed. MVK still reports support, but spirv-cross has been fixed in a way that it _attempts_ to use this capability, but the metal compiler errors since it doesn't exist. Some other fixes: - textureGatherOffsets emulation has been changed significantly. It now uses 4 texture sample instructions (not gather), calculates a base texel (i=0 j=0) and adds the offsets onto it before converting into a tex coord. The final result is offset into a texel center, so it shouldn't be subject to interpolation, though this isn't perfect and could have some error with floating point formats with linear sampling. It is subject to texture wrap mode as it should be, which is why texelFetch was not used. - Maybe gather should be used here with component `w` (i=0, j=0), though this multiplies number of texels fetched by 4... The way it was doing this before _was_ wrong_, but doing it right would avoid issues with texel center precision. - textureGatherOffset (singular) now performs textureGather with the offset applied to the coords, rather than the slower fallback where each texel is fetched individually. * Increment shader cache version, remove unused arg * Use base texture size for gather coord offset. Implicit LOD for gather is not supported. * Use 4 texture gathers for offsets emulation Avoids issues with interpolation at cost of performance (not sure how bad this is) * Address Feedback --- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 + .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../Shader/GpuAccessorBase.cs | 2 + src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 1 + src/Ryujinx.Graphics.Shader/IGpuAccessor.cs | 9 +++ .../Translation/Transforms/TexturePass.cs | 64 ++++++++++++++++--- src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 1 + 7 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index 7564220490..8959bf93e4 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -38,6 +38,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsShaderBallot; public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; + public readonly bool SupportsTextureGatherOffsets; public readonly bool SupportsTextureShadowLod; public readonly bool SupportsVertexStoreAndAtomics; public readonly bool SupportsViewportIndexVertexTessellation; @@ -92,6 +93,7 @@ namespace Ryujinx.Graphics.GAL bool supportsShaderBallot, bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, + bool supportsTextureGatherOffsets, bool supportsTextureShadowLod, bool supportsVertexStoreAndAtomics, bool supportsViewportIndexVertexTessellation, @@ -142,6 +144,7 @@ namespace Ryujinx.Graphics.GAL SupportsShaderBallot = supportsShaderBallot; SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; + SupportsTextureGatherOffsets = supportsTextureGatherOffsets; SupportsTextureShadowLod = supportsTextureShadowLod; SupportsVertexStoreAndAtomics = supportsVertexStoreAndAtomics; SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 0f1aa6a967..0dc4b1a722 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 5767; + private const uint CodeGenVersion = 5791; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 9d030cd60d..a5b31363b3 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -186,6 +186,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat; + public bool QueryHostSupportsTextureGatherOffsets() => _context.Capabilities.SupportsTextureGatherOffsets; + public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod; public bool QueryHostSupportsTransformFeedback() => _context.Capabilities.SupportsTransformFeedback; diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 3eba15e342..667ea78250 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -163,6 +163,7 @@ namespace Ryujinx.Graphics.OpenGL supportsShaderBallot: HwCapabilities.SupportsShaderBallot, supportsShaderBarrierDivergence: !(intelWindows || intelUnix), supportsShaderFloat64: true, + supportsTextureGatherOffsets: true, supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, supportsVertexStoreAndAtomics: true, supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray, diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 4dc75a3e1b..29a5435e31 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -339,6 +339,15 @@ namespace Ryujinx.Graphics.Shader return true; } + /// + /// Queries host GPU texture gather with multiple offsets support. + /// + /// True if the GPU and driver supports texture gather offsets, false otherwise + bool QueryHostSupportsTextureGatherOffsets() + { + return true; + } + /// /// Queries host GPU texture shadow LOD support. /// diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs index dbfe6269e8..495ea8a94c 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs @@ -303,7 +303,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; - bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset(); + bool needsOffsetsEmulation = hasOffsets && !gpuAccessor.QueryHostSupportsTextureGatherOffsets(); + + bool hasInvalidOffset = needsOffsetsEmulation || ((hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset()); bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; @@ -402,11 +404,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms offsets[index] = offset; } - hasInvalidOffset &= !areAllOffsetsConstant; - - if (!hasInvalidOffset) + if (!needsOffsetsEmulation) { - return node; + hasInvalidOffset &= !areAllOffsetsConstant; + + if (!hasInvalidOffset) + { + return node; + } } if (hasLodBias) @@ -434,13 +439,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms LinkedListNode oldNode = node; - if (isGather && !isShadow) + if (isGather && !isShadow && hasOffsets) { Operand[] newSources = new Operand[sources.Length]; sources.CopyTo(newSources, 0); - Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage); + Operand[] texSizes = InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount); int destIndex = 0; @@ -455,7 +460,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms { Operand offset = Local(); - Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)]; + Operand intOffset = offsets[index + compIndex * coordsCount]; node.List.AddBefore(node, new Operation( Instruction.FP32 | Instruction.Divide, @@ -478,7 +483,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms texOp.Format, texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets), texOp.Binding, - 1, + 1 << 3, // W component: i=0, j=0 new[] { dests[destIndex++] }, newSources); @@ -502,7 +507,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms } else { - Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage); + Operand[] texSizes = isGather + ? InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount) + : InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage); for (int index = 0; index < coordsCount; index++) { @@ -549,6 +556,43 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms return node; } + private static Operand[] InsertTextureBaseSize( + LinkedListNode node, + TextureOperation texOp, + Operand bindlessHandle, + int coordsCount) + { + Operand[] texSizes = new Operand[coordsCount]; + + for (int index = 0; index < coordsCount; index++) + { + texSizes[index] = Local(); + + Operand[] texSizeSources; + + if (bindlessHandle != null) + { + texSizeSources = new Operand[] { bindlessHandle, Const(0) }; + } + else + { + texSizeSources = new Operand[] { Const(0) }; + } + + node.List.AddBefore(node, new TextureOperation( + Instruction.TextureQuerySize, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.Binding, + index, + new[] { texSizes[index] }, + texSizeSources)); + } + + return texSizes; + } + private static Operand[] InsertTextureLod( LinkedListNode node, TextureOperation texOp, diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index a483dc5997..ab8e613718 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -605,6 +605,7 @@ namespace Ryujinx.Graphics.Vulkan supportsShaderBallot: false, supportsShaderBarrierDivergence: Vendor != Vendor.Intel, supportsShaderFloat64: Capabilities.SupportsShaderFloat64, + supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk, supportsTextureShadowLod: false, supportsVertexStoreAndAtomics: features2.Features.VertexPipelineStoresAndAtomics, supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,